From cf5f8b7db95e4613ee1f7b9b103c80d15395c1f0 Mon Sep 17 00:00:00 2001 From: Vishwas Dsouza Date: Sat, 3 Dec 2022 18:43:22 +0000 Subject: [PATCH 1/8] prov/efa: Do no ignore hints->domain_attr->name Prior to this patch, the provider would ignore the domain name in hints->domain_attr->name even if this was set. This patch fixed this issue. Signed-off-by: Vishwas Dsouza --- prov/efa/src/efa_prov_info.c | 18 ++++++++++++++++++ prov/efa/src/efa_prov_info.h | 3 +++ prov/efa/src/efa_user_info.c | 8 ++++++++ 3 files changed, 29 insertions(+) diff --git a/prov/efa/src/efa_prov_info.c b/prov/efa/src/efa_prov_info.c index 71897f54764..836fe5ca185 100644 --- a/prov/efa/src/efa_prov_info.c +++ b/prov/efa/src/efa_prov_info.c @@ -722,3 +722,21 @@ int efa_prov_info_compare_src_addr(const char *node, uint64_t flags, const struc return 0; } +/* + * @brief Compare the domain name specified via hints and match it with the + * domain name in prov_info + * + * @param info[in] info object + * @param hints[in] hints from user's call to fi_getinfo() + * + * return 1 - If the names are different + * 0 - No difference, names match. + */ +int efa_prov_info_compare_domain_name(const struct fi_info *hints, + const struct fi_info *info) +{ + if (hints && hints->domain_attr && hints->domain_attr->name) + return strcmp(info->domain_attr->name, hints->domain_attr->name); + + return 0; +} diff --git a/prov/efa/src/efa_prov_info.h b/prov/efa/src/efa_prov_info.h index b185529d2b7..e8aaae65d6e 100644 --- a/prov/efa/src/efa_prov_info.h +++ b/prov/efa/src/efa_prov_info.h @@ -45,4 +45,7 @@ int efa_prov_info_alloc_for_rxr(struct fi_info **prov_info_rxr, int efa_prov_info_compare_src_addr(const char *node, uint64_t flags, const struct fi_info *hints, const struct fi_info *fi); +int efa_prov_info_compare_domain_name(const struct fi_info *hints, + const struct fi_info *info); + #endif diff --git a/prov/efa/src/efa_user_info.c b/prov/efa/src/efa_user_info.c index 1214d9608da..e11bf446a92 100644 --- a/prov/efa/src/efa_user_info.c +++ b/prov/efa/src/efa_user_info.c @@ -218,6 +218,10 @@ int efa_user_info_get_dgram(uint32_t version, const char *node, const char *serv if (ret) continue; + ret = efa_prov_info_compare_domain_name(hints, prov_info_dgram); + if (ret) + continue; + EFA_INFO(FI_LOG_FABRIC, "found match for interface %s %s\n", node, prov_info_dgram->fabric_attr->name); if (hints) { @@ -446,6 +450,10 @@ int efa_user_info_get_rdm(uint32_t version, const char *node, if (ret) continue; + ret = efa_prov_info_compare_domain_name(hints, prov_info_rxr); + if (ret) + continue; + dupinfo = fi_dupinfo(prov_info_rxr); if (!dupinfo) { ret = -FI_ENOMEM; From 10552f39e6f3da726a9abcd0113a900e44e2738e Mon Sep 17 00:00:00 2001 From: Vishwas Dsouza Date: Sat, 3 Dec 2022 18:45:46 +0000 Subject: [PATCH 2/8] fabtests/pytest/efa: Add efa device selection test to fabtests suite This test tests the -d argument of the fabtests. All the EFA domain names are tested with this test with the corresponding fabtest. Signed-off-by: Vishwas Dsouza --- fabtests/Makefile.am | 1 + fabtests/pytest/efa/efa_common.py | 47 ++++++++++++++++++- .../pytest/efa/test_efa_device_selection.py | 39 +++++++++++++++ 3 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 fabtests/pytest/efa/test_efa_device_selection.py diff --git a/fabtests/Makefile.am b/fabtests/Makefile.am index c6113d5763f..3b31cbbb9ac 100644 --- a/fabtests/Makefile.am +++ b/fabtests/Makefile.am @@ -170,6 +170,7 @@ nobase_dist_config_DATA = \ pytest/efa/test_rnr.py \ pytest/efa/test_efa_info.py \ pytest/efa/test_efa_protocol_selection.py \ + pytest/efa/test_efa_device_selection.py \ pytest/efa/test_runt.py \ pytest/efa/test_fork_support.py \ pytest/efa/test_mr.py \ diff --git a/fabtests/pytest/efa/efa_common.py b/fabtests/pytest/efa/efa_common.py index ca68a78d181..539799b7d12 100644 --- a/fabtests/pytest/efa/efa_common.py +++ b/fabtests/pytest/efa/efa_common.py @@ -22,15 +22,22 @@ def efa_run_client_server_test(cmdline_args, executable, iteration_type, test.run() @retry(retry_on_exception=is_ssh_connection_error, stop_max_attempt_number=3, wait_fixed=5000) -def efa_retrieve_hw_counter_value(hostname, hw_counter_name): +def efa_retrieve_hw_counter_value(hostname, hw_counter_name, efa_device_name=None): """ retrieve the value of EFA's hardware counter hostname: a host that has efa hw_counter_name: EFA hardware counter name. Options are: lifespan, rdma_read_resp_bytes, rdma_read_wrs,recv_wrs, rx_drops, send_bytes, tx_bytes, rdma_read_bytes, rdma_read_wr_err, recv_bytes, rx_bytes, rx_pkts, send_wrs, tx_pkts + efa_device_name: Name of the EFA device. Corresponds to the name of the EFA device's directory return: an integer that is sum of all EFA device's counter """ - command = 'ssh {} cat "/sys/class/infiniband/*/ports/*/hw_counters/{}"'.format(hostname, hw_counter_name) + + if efa_device_name: + efa_device_dir = efa_device_name + else: + efa_device_dir = '*' + + command = 'ssh {} cat "/sys/class/infiniband/{}/ports/*/hw_counters/{}"'.format(hostname, efa_device_dir, hw_counter_name) process = subprocess.run(command, shell=True, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8") if process.returncode != 0: if process.stderr and has_ssh_connection_err_msg(process.stderr): @@ -70,3 +77,39 @@ def efa_retrieve_gid(hostname): return None return process.stdout.decode("utf-8").strip() + +@retry(retry_on_exception=is_ssh_connection_error, stop_max_attempt_number=3, wait_fixed=5000) +def get_efa_domain_names(server_id): + timeout = 60 + process_timed_out = False + + # This command returns a list of EFA domain names and its related info + command = "ssh {} fi_info -p efa".format(server_id) + p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8") + + try: + p.wait(timeout=timeout) + except subprocess.TimeoutExpired: + p.terminate() + process_timed_out = True + + assert not process_timed_out, "Process timed out" + + errors = p.stderr.readlines() + for error in errors: + error = error.strip() + if "fi_getinfo: -61" in error: + raise Exception("No EFA devices/domain names found") + + if has_ssh_connection_err_msg(error): + raise SshConnectionError() + + efa_domain_names = [] + for line in p.stdout: + line = line.strip() + if 'domain' in line: + domain_name = line.split(': ')[1] + efa_domain_names.append(domain_name) + + return efa_domain_names + diff --git a/fabtests/pytest/efa/test_efa_device_selection.py b/fabtests/pytest/efa/test_efa_device_selection.py new file mode 100644 index 00000000000..422a3195262 --- /dev/null +++ b/fabtests/pytest/efa/test_efa_device_selection.py @@ -0,0 +1,39 @@ +import pytest + + +# This test must be run in serial mode because it checks the hw counter +@pytest.mark.serial +@pytest.mark.functional +def test_efa_device_selection(cmdline_args): + from efa.efa_common import efa_retrieve_hw_counter_value, get_efa_domain_names + from common import ClientServerTest + + if cmdline_args.server_id == cmdline_args.client_id: + pytest.skip("EFA device selection test requires 2 nodes") + return + + efa_domain_names = get_efa_domain_names(cmdline_args.server_id) + + for efa_domain_name in efa_domain_names: + if '-rdm' in efa_domain_name: + assert not('-dgrm') in efa_domain_name + fabtest_opts = "fi_rdm_pingpong" + elif '-dgrm' in efa_domain_name: + assert not('-rdm') in efa_domain_name + fabtest_opts = "fi_dgram_pingpong -k" + + efa_device_name = efa_domain_name.split('-')[0] + + server_tx_bytes_before_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "tx_bytes", efa_device_name) + client_tx_bytes_before_test = efa_retrieve_hw_counter_value(cmdline_args.client_id, "tx_bytes", efa_device_name) + + executable = "{} -d {}".format(fabtest_opts, efa_domain_name) + test = ClientServerTest(cmdline_args, executable, message_size="1000", timeout=300) + test.run() + + server_tx_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "tx_bytes", efa_device_name) + client_tx_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.client_id, "tx_bytes", efa_device_name) + + # Verify EFA traffic + assert server_tx_bytes_before_test < server_tx_bytes_after_test + assert client_tx_bytes_before_test < client_tx_bytes_after_test From bcd6593dbb5dbdcb0f491b244fdc195120d7c835 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Wed, 7 Dec 2022 15:46:10 +0000 Subject: [PATCH 3/8] prov/efa: add a FI_WARN about endpoint address after it is created This patch added a call to FI_WARN to print endpoint's address and libfabric version after endpoint is created. Signed-off-by: Wei Zhang --- prov/efa/src/rxr/rxr_ep.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/prov/efa/src/rxr/rxr_ep.c b/prov/efa/src/rxr/rxr_ep.c index 7ffad730b7c..a08a2beb6b1 100644 --- a/prov/efa/src/rxr/rxr_ep.c +++ b/prov/efa/src/rxr/rxr_ep.c @@ -957,8 +957,8 @@ static int rxr_ep_ctrl(struct fid *fid, int command, void *arg) ssize_t ret; struct rxr_ep *ep; struct efa_domain *efa_domain; - char shm_ep_name[EFA_SHM_NAME_MAX]; - size_t shm_ep_name_len; + char shm_ep_name[EFA_SHM_NAME_MAX], ep_addr_str[OFI_ADDRSTRLEN]; + size_t shm_ep_name_len, ep_addr_strlen; switch (command) { case FI_ENABLE: @@ -985,6 +985,11 @@ static int rxr_ep_ctrl(struct fid *fid, int command, void *arg) FI_DBG(&rxr_prov, FI_LOG_EP_CTRL, "core_addrlen = %ld\n", ep->core_addrlen); + ep_addr_strlen = sizeof(ep_addr_str); + rxr_ep_raw_addr_str(ep, ep_addr_str, &ep_addr_strlen); + FI_WARN(&rxr_prov, FI_LOG_EP_CTRL, "libfabric %s efa endpoint created! address: %s\n", + fi_tostr("1", FI_TYPE_VERSION), ep_addr_str); + /* Enable shm provider endpoint & post recv buff. * Once core ep enabled, 18 bytes efa_addr (16 bytes raw + 2 bytes qpn) is set. * We convert the address to 'gid_qpn' format, and set it as shm ep name, so From 812aac61552ccab5c5395cfa7cb3e169a3e3a607 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Mon, 28 Nov 2022 09:33:43 +0100 Subject: [PATCH 4/8] prov/peer: Introducing peer "provider" with peer_cq to begin with include/ofi_peer.h file along with prov/peer directory defines space where peer-related common code is placed. peer_cq is the first structure that has been defined. It is based on coll_cq structure. coll provider uses peer_cq instead of removed coll_cq. See man/fi_peer.3.md Signed-off-by: Tomasz Gromadzki --- Makefile.am | 4 +- include/ofi_peer.h | 47 ++++++++++++++ prov/coll/src/coll.h | 7 +-- prov/coll/src/coll_coll.c | 10 ++- prov/coll/src/coll_domain.c | 2 +- prov/peer/src/peer.h | 51 +++++++++++++++ .../src/coll_cq.c => peer/src/peer_cq.c} | 63 +++++++++++++------ 7 files changed, 152 insertions(+), 32 deletions(-) create mode 100644 include/ofi_peer.h create mode 100644 prov/peer/src/peer.h rename prov/{coll/src/coll_cq.c => peer/src/peer_cq.c} (60%) diff --git a/Makefile.am b/Makefile.am index deb7ce6f013..900007540d2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -85,11 +85,12 @@ common_srcs = \ prov/util/src/rocr_mem_monitor.c \ prov/util/src/ze_mem_monitor.c \ prov/util/src/cuda_ipc_monitor.c \ + prov/peer/src/peer_cq.c \ + prov/peer/src/peer.h \ prov/coll/src/coll_attr.c \ prov/coll/src/coll_av.c \ prov/coll/src/coll_av_set.c \ prov/coll/src/coll_coll.c \ - prov/coll/src/coll_cq.c \ prov/coll/src/coll_domain.c \ prov/coll/src/coll_ep.c \ prov/coll/src/coll_eq.c \ @@ -189,6 +190,7 @@ src_libfabric_la_SOURCES = \ include/ofi_net.h \ include/ofi_perf.h \ include/ofi_coll.h \ + include/ofi_peer.h \ include/fasthash.h \ include/rbtree.h \ include/uthash.h \ diff --git a/include/ofi_peer.h b/include/ofi_peer.h new file mode 100644 index 00000000000..72f1a3c500f --- /dev/null +++ b/include/ofi_peer.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2022 Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _OFI_PEER_H_ +#define _OFI_PEER_H_ + +#include + +#include + +int ofi_peer_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, + struct fid_cq **cq_fid, void *context); + +ssize_t ofi_peer_cq_write(struct fid_cq *cq, void *context, uint64_t flags, + size_t len, void *buf, uint64_t data, uint64_t tag, + fi_addr_t src); + +#endif /* _OFI_PEER_H_ */ diff --git a/prov/coll/src/coll.h b/prov/coll/src/coll.h index 1fcf6db1e25..cd0d98d00ab 100644 --- a/prov/coll/src/coll.h +++ b/prov/coll/src/coll.h @@ -60,6 +60,8 @@ #include #include #include +#include +#include #define COLL_IOV_LIMIT 4 #define COLL_MR_MODES (OFI_MR_BASIC_MAP | FI_MR_LOCAL) @@ -81,11 +83,6 @@ struct coll_av { struct fid_peer_av *peer_av; }; -struct coll_cq { - struct util_cq util_cq; - struct fid_peer_cq *peer_cq; -}; - struct coll_eq { struct util_eq util_eq; struct fid_eq *peer_eq; diff --git a/prov/coll/src/coll_coll.c b/prov/coll/src/coll_coll.c index bd03e03d31e..42e720ec03d 100644 --- a/prov/coll/src/coll_coll.c +++ b/prov/coll/src/coll_coll.c @@ -31,7 +31,6 @@ */ #include "coll.h" -#include "ofi_coll.h" static uint64_t coll_form_tag(uint32_t coll_id, uint32_t rank) { @@ -721,13 +720,12 @@ void coll_join_comp(struct util_coll_operation *coll_op) void coll_collective_comp(struct util_coll_operation *coll_op) { struct coll_ep *ep; - struct coll_cq *cq; - + int ret; ep = container_of(coll_op->ep, struct coll_ep, util_ep.ep_fid); - cq = container_of(ep->util_ep.tx_cq, struct coll_cq, util_cq); - if (cq->peer_cq->owner_ops->write(cq->peer_cq, coll_op->context, - FI_COLLECTIVE, 0, 0, 0, 0, 0)) + ret = ofi_peer_cq_write(&ep->util_ep.tx_cq->cq_fid, coll_op->context, + FI_COLLECTIVE, 0, 0, 0, 0, 0); + if (ret) FI_WARN(ep->util_ep.domain->fabric->prov, FI_LOG_DOMAIN, "collective - cq write failed\n"); diff --git a/prov/coll/src/coll_domain.c b/prov/coll/src/coll_domain.c index fbf1f2272a3..3484f8019a1 100644 --- a/prov/coll/src/coll_domain.c +++ b/prov/coll/src/coll_domain.c @@ -35,7 +35,7 @@ static struct fi_ops_domain coll_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = coll_av_open, - .cq_open = coll_cq_open, + .cq_open = ofi_peer_cq_open, .endpoint = coll_endpoint, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_no_cntr_open, diff --git a/prov/peer/src/peer.h b/prov/peer/src/peer.h new file mode 100644 index 00000000000..645360346d0 --- /dev/null +++ b/prov/peer/src/peer.h @@ -0,0 +1,51 @@ + +/* + * Copyright (c) 2022 Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _PEER_H_ +#define _PEER_H_ + +#if HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +struct peer_cq { + struct util_cq util_cq; + struct fid_peer_cq *peer_cq; +}; + +#endif /* _PEER_H_ */ + diff --git a/prov/coll/src/coll_cq.c b/prov/peer/src/peer_cq.c similarity index 60% rename from prov/coll/src/coll_cq.c rename to prov/peer/src/peer_cq.c index 3c279b5113e..dbd9b6bace9 100644 --- a/prov/coll/src/coll_cq.c +++ b/prov/peer/src/peer_cq.c @@ -30,14 +30,14 @@ * SOFTWARE. */ -#include "coll.h" +#include "peer.h" -static int coll_cq_close(struct fid *fid) +static int peer_cq_close(struct fid *fid) { - struct coll_cq *cq; + struct peer_cq *cq; int ret; - cq = container_of(fid, struct coll_cq, util_cq.cq_fid.fid); + cq = container_of(fid, struct peer_cq, util_cq.cq_fid.fid); ret = ofi_cq_cleanup(&cq->util_cq); if (ret) @@ -47,15 +47,15 @@ static int coll_cq_close(struct fid *fid) return 0; } -static struct fi_ops coll_cq_fi_ops = { +static struct fi_ops peer_cq_fi_ops = { .size = sizeof(struct fi_ops), - .close = coll_cq_close, + .close = peer_cq_close, .bind = fi_no_bind, .control = fi_no_control, .ops_open = fi_no_ops_open, }; -static struct fi_ops_cq coll_cq_ops = { +static struct fi_ops_cq peer_cq_ops = { .size = sizeof(struct fi_ops_cq), .read = fi_no_cq_read, .readfrom = fi_no_cq_readfrom, @@ -66,21 +66,27 @@ static struct fi_ops_cq coll_cq_ops = { .strerror = fi_no_cq_strerror, }; -int coll_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq_fid, void *context) +int peer_cq_init(struct fid_domain *domain, struct fi_cq_attr *attr, + struct fid_cq **cq_fid, struct fi_peer_cq_context *peer_context) { - struct coll_cq *cq; - struct fi_peer_cq_context *peer_context = context; + struct peer_cq *cq; int ret; + const struct util_domain *util_domain; + const struct fi_provider* provider; + + util_domain = container_of(domain, struct util_domain, domain_fid.fid); + provider = util_domain->fabric->prov; + + if (!attr || !(attr->flags & FI_PEER)) { - FI_WARN(&coll_prov, FI_LOG_CORE, "FI_PEER flag required\n"); - return -EINVAL; + FI_WARN(provider, FI_LOG_CORE, "FI_PEER flag required\n"); + return -FI_EINVAL; } if (!peer_context || peer_context->size < sizeof(*peer_context)) { - FI_WARN(&coll_prov, FI_LOG_CORE, "invalid peer CQ context\n"); - return -EINVAL; + FI_WARN(provider, FI_LOG_CORE, "invalid peer CQ context\n"); + return -FI_EINVAL; } cq = calloc(1, sizeof(*cq)); @@ -89,17 +95,36 @@ int coll_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, cq->peer_cq = peer_context->cq; - ret = ofi_cq_init(&coll_prov, domain, attr, &cq->util_cq, &ofi_cq_progress, - context); + ret = ofi_cq_init(provider, domain, attr, &cq->util_cq, + &ofi_cq_progress, NULL); if (ret) goto err; *cq_fid = &cq->util_cq.cq_fid; - (*cq_fid)->fid.ops = &coll_cq_fi_ops; - (*cq_fid)->ops = &coll_cq_ops; + (*cq_fid)->fid.ops = &peer_cq_fi_ops; + (*cq_fid)->ops = &peer_cq_ops; return 0; err: free(cq); return ret; } + +int ofi_peer_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, + struct fid_cq **cq_fid, void *context) +{ + struct fi_peer_cq_context *peer_context = context; + return peer_cq_init(domain, attr, cq_fid, peer_context); +} + +ssize_t ofi_peer_cq_write(struct fid_cq *cq_fid, void *context, uint64_t flags, + size_t len, void *buf, uint64_t data, uint64_t tag, + fi_addr_t src) +{ + struct peer_cq *cq; + + cq = container_of(cq_fid, struct peer_cq, util_cq.cq_fid); + + return cq->peer_cq->owner_ops->write(cq->peer_cq, context, + flags, len, buf, data, tag, src); +} From 1287ca8bfe357e0719c093fd7df6f5baff7a003f Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Fri, 2 Dec 2022 23:00:33 +0100 Subject: [PATCH 5/8] prov/peer: add provider parameter to peer_cq_init Adding struct fi_provider* as the first parameter to peer_cq_init() makes it similar to ofi_cq_init(). Signed-off-by: Tomasz Gromadzki --- include/ofi_peer.h | 2 -- prov/peer/src/peer_cq.c | 22 +++++++++++----------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/include/ofi_peer.h b/include/ofi_peer.h index 72f1a3c500f..77c4332a39d 100644 --- a/include/ofi_peer.h +++ b/include/ofi_peer.h @@ -33,8 +33,6 @@ #ifndef _OFI_PEER_H_ #define _OFI_PEER_H_ -#include - #include int ofi_peer_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, diff --git a/prov/peer/src/peer_cq.c b/prov/peer/src/peer_cq.c index dbd9b6bace9..30a780a0a5e 100644 --- a/prov/peer/src/peer_cq.c +++ b/prov/peer/src/peer_cq.c @@ -66,19 +66,13 @@ static struct fi_ops_cq peer_cq_ops = { .strerror = fi_no_cq_strerror, }; -int peer_cq_init(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq_fid, struct fi_peer_cq_context *peer_context) +static int peer_cq_init(const struct fi_provider* provider, + struct fid_domain *domain, struct fi_cq_attr *attr, + struct fid_cq **cq_fid, struct fi_peer_cq_context *peer_context) { struct peer_cq *cq; int ret; - const struct util_domain *util_domain; - const struct fi_provider* provider; - - util_domain = container_of(domain, struct util_domain, domain_fid.fid); - provider = util_domain->fabric->prov; - - if (!attr || !(attr->flags & FI_PEER)) { FI_WARN(provider, FI_LOG_CORE, "FI_PEER flag required\n"); return -FI_EINVAL; @@ -113,8 +107,14 @@ int peer_cq_init(struct fid_domain *domain, struct fi_cq_attr *attr, int ofi_peer_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, struct fid_cq **cq_fid, void *context) { - struct fi_peer_cq_context *peer_context = context; - return peer_cq_init(domain, attr, cq_fid, peer_context); + struct util_domain *util_domain; + struct fi_peer_cq_context *peer_context; + + util_domain = container_of(domain, struct util_domain, domain_fid.fid); + peer_context = context; + + return peer_cq_init(util_domain->fabric->prov, domain, attr, + cq_fid, peer_context); } ssize_t ofi_peer_cq_write(struct fid_cq *cq_fid, void *context, uint64_t flags, From 234140aadfbcff770c0b2baaf18482081d6dea9b Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Tue, 6 Dec 2022 09:55:51 +0100 Subject: [PATCH 6/8] prov/peer: Minimalist version of peer_cq struct peer_cq is not used at all. All operation are based on fid_peer_cq. There are no real CQ on util_coll. Only fid_peer_cq is binded to util_coll_ep. Signed-off-by: Tomasz Gromadzki --- prov/coll/src/coll.h | 6 ++++++ prov/coll/src/coll_coll.c | 6 +++++- prov/coll/src/coll_ep.c | 26 +++++++++++++++++++++++++- prov/peer/src/peer_cq.c | 13 ++++++++++++- prov/rxm/src/rxm_ep.c | 5 ++--- 5 files changed, 50 insertions(+), 6 deletions(-) diff --git a/prov/coll/src/coll.h b/prov/coll/src/coll.h index cd0d98d00ab..bf554a79ccf 100644 --- a/prov/coll/src/coll.h +++ b/prov/coll/src/coll.h @@ -98,6 +98,12 @@ struct coll_ep { */ struct fi_info *peer_info; struct fid_ep *peer_ep; + + /* + * local pointers to peer_cq - configured via bind() operation + */ + struct fid_peer_cq *tx_peer_cq; + struct fid_peer_cq *rx_peer_cq; }; struct coll_mr { diff --git a/prov/coll/src/coll_coll.c b/prov/coll/src/coll_coll.c index 42e720ec03d..62b3636186c 100644 --- a/prov/coll/src/coll_coll.c +++ b/prov/coll/src/coll_coll.c @@ -723,7 +723,11 @@ void coll_collective_comp(struct util_coll_operation *coll_op) int ret; ep = container_of(coll_op->ep, struct coll_ep, util_ep.ep_fid); - ret = ofi_peer_cq_write(&ep->util_ep.tx_cq->cq_fid, coll_op->context, + /* + ret = ofi_peer_cq_write(&ep->util_ep.tx_cq->cq_fid, coll_op->context, + FI_COLLECTIVE, 0, 0, 0, 0, 0); + */ + ret = ep->tx_peer_cq->owner_ops->write(ep->tx_peer_cq, coll_op->context, FI_COLLECTIVE, 0, 0, 0, 0, 0); if (ret) FI_WARN(ep->util_ep.domain->fabric->prov, FI_LOG_DOMAIN, diff --git a/prov/coll/src/coll_ep.c b/prov/coll/src/coll_ep.c index aa76c2711c4..988040c18e0 100644 --- a/prov/coll/src/coll_ep.c +++ b/prov/coll/src/coll_ep.c @@ -79,6 +79,30 @@ static struct fi_ops_collective coll_ops_collective = { .msg = fi_coll_no_msg, }; +static int +coll_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) +{ + struct coll_ep *ep; + struct fid_peer_cq *peer_cq; + + if (bfid->fclass != FI_CLASS_PEER_CQ) { + return ofi_ep_fid_bind(ep_fid, bfid, flags); + } + if (!(flags & (FI_TRANSMIT | FI_RECV))) { + return -FI_EINVAL; + } + + ep = container_of(ep_fid, struct coll_ep, util_ep.ep_fid.fid); + peer_cq = container_of(bfid, struct fid_peer_cq, fid); + + if (flags & FI_TRANSMIT) + ep->tx_peer_cq = peer_cq; + if (flags & FI_RECV) + ep->rx_peer_cq = peer_cq; + return FI_SUCCESS; +} + + static int coll_ep_close(struct fid *fid) { struct coll_ep *ep; @@ -103,7 +127,7 @@ static int coll_ep_ctrl(struct fid *fid, int command, void *arg) static struct fi_ops coll_ep_fi_ops = { .size = sizeof(struct fi_ops), .close = coll_ep_close, - .bind = ofi_ep_fid_bind, + .bind = coll_ep_bind, .control = coll_ep_ctrl, .ops_open = fi_no_ops_open, }; diff --git a/prov/peer/src/peer_cq.c b/prov/peer/src/peer_cq.c index 30a780a0a5e..f41c95d2381 100644 --- a/prov/peer/src/peer_cq.c +++ b/prov/peer/src/peer_cq.c @@ -113,7 +113,7 @@ int ofi_peer_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, util_domain = container_of(domain, struct util_domain, domain_fid.fid); peer_context = context; - return peer_cq_init(util_domain->fabric->prov, domain, attr, + return peer_cq_init(util_domain->fabric->prov, domain, attr, cq_fid, peer_context); } @@ -128,3 +128,14 @@ ssize_t ofi_peer_cq_write(struct fid_cq *cq_fid, void *context, uint64_t flags, return cq->peer_cq->owner_ops->write(cq->peer_cq, context, flags, len, buf, data, tag, src); } + + +ssize_t ofi_peer_cq_writeerr(struct fid_cq *cq_fid, + const struct fi_cq_err_entry *err_entry) +{ + struct peer_cq *cq; + + cq = container_of(cq_fid, struct peer_cq, util_cq.cq_fid); + + return cq->peer_cq->owner_ops->writeerr(cq->peer_cq, err_entry); +} diff --git a/prov/rxm/src/rxm_ep.c b/prov/rxm/src/rxm_ep.c index 7f60a124c15..61168f283d3 100644 --- a/prov/rxm/src/rxm_ep.c +++ b/prov/rxm/src/rxm_ep.c @@ -1624,9 +1624,8 @@ static int rxm_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) case FI_CLASS_CQ: rxm_cq = container_of(bfid, struct rxm_cq, util_cq.cq_fid.fid); if (rxm_ep->util_coll_ep && rxm_cq->util_coll_cq) { - ret = ofi_ep_fid_bind(&rxm_ep->util_coll_ep->fid, - &rxm_cq->util_coll_cq->fid, - flags); + ret = fi_ep_bind(rxm_ep->util_coll_ep, + &rxm_cq->peer_cq.fid, flags); if (ret) retv = ret; } From 333fb0136d00e79fc0fbbdc71b0b2dea98c50f99 Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Tue, 6 Dec 2022 12:10:39 +0100 Subject: [PATCH 7/8] prov/rxm: remove util_coll_cq from rxm util_coll_cq is no longer needed as we use only fid_peer_cq to link util_coll_ep and rxm_ep.util_cq directly with help of bind() operation. Signed-off-by: Tomasz Gromadzki --- prov/coll/src/coll_coll.c | 4 ---- prov/rxm/src/rxm.h | 1 - prov/rxm/src/rxm_cq.c | 12 ------------ prov/rxm/src/rxm_ep.c | 2 +- 4 files changed, 1 insertion(+), 18 deletions(-) diff --git a/prov/coll/src/coll_coll.c b/prov/coll/src/coll_coll.c index 62b3636186c..90fe3bf5b0b 100644 --- a/prov/coll/src/coll_coll.c +++ b/prov/coll/src/coll_coll.c @@ -723,10 +723,6 @@ void coll_collective_comp(struct util_coll_operation *coll_op) int ret; ep = container_of(coll_op->ep, struct coll_ep, util_ep.ep_fid); - /* - ret = ofi_peer_cq_write(&ep->util_ep.tx_cq->cq_fid, coll_op->context, - FI_COLLECTIVE, 0, 0, 0, 0, 0); - */ ret = ep->tx_peer_cq->owner_ops->write(ep->tx_peer_cq, coll_op->context, FI_COLLECTIVE, 0, 0, 0, 0, 0); if (ret) diff --git a/prov/rxm/src/rxm.h b/prov/rxm/src/rxm.h index 0fe48082a5f..a663cfb8f10 100644 --- a/prov/rxm/src/rxm.h +++ b/prov/rxm/src/rxm.h @@ -280,7 +280,6 @@ struct rxm_domain { struct rxm_cq { struct util_cq util_cq; struct fid_peer_cq peer_cq; - struct fid_cq *util_coll_cq; struct fid_cq *offload_coll_cq; }; diff --git a/prov/rxm/src/rxm_cq.c b/prov/rxm/src/rxm_cq.c index 24c4dcd354d..cbf742e3ea3 100644 --- a/prov/rxm/src/rxm_cq.c +++ b/prov/rxm/src/rxm_cq.c @@ -2071,9 +2071,6 @@ static int rxm_cq_close(struct fid *fid) if (rxm_cq->offload_coll_cq) fi_close(&rxm_cq->offload_coll_cq->fid); - if (rxm_cq->util_coll_cq) - fi_close(&rxm_cq->util_coll_cq->fid); - ret = ofi_cq_cleanup(&rxm_cq->util_cq); if (ret) retv = ret; @@ -2145,13 +2142,6 @@ int rxm_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, rxm_cq->peer_cq.owner_ops = &rxm_cq_owner_ops; peer_cq_context.cq = &rxm_cq->peer_cq; - if (rxm_domain->util_coll_domain) { - ret = fi_cq_open(rxm_domain->util_coll_domain, &peer_cq_attr, - &rxm_cq->util_coll_cq, &peer_cq_context); - if (ret) - goto err2; - } - if (rxm_domain->offload_coll_domain) { ret = fi_cq_open(rxm_domain->offload_coll_domain, &peer_cq_attr, &rxm_cq->offload_coll_cq, &peer_cq_context); @@ -2166,8 +2156,6 @@ int rxm_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, return 0; err2: - if (rxm_cq->util_coll_cq) - fi_close(&rxm_cq->util_coll_cq->fid); ofi_cq_cleanup(&rxm_cq->util_cq); err1: free(rxm_cq); diff --git a/prov/rxm/src/rxm_ep.c b/prov/rxm/src/rxm_ep.c index 61168f283d3..b37b93e5ec5 100644 --- a/prov/rxm/src/rxm_ep.c +++ b/prov/rxm/src/rxm_ep.c @@ -1623,7 +1623,7 @@ static int rxm_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) case FI_CLASS_CQ: rxm_cq = container_of(bfid, struct rxm_cq, util_cq.cq_fid.fid); - if (rxm_ep->util_coll_ep && rxm_cq->util_coll_cq) { + if (rxm_ep->util_coll_ep) { ret = fi_ep_bind(rxm_ep->util_coll_ep, &rxm_cq->peer_cq.fid, flags); if (ret) From 867d72caa11db5e544f4d19cd2818cf1fbcc13df Mon Sep 17 00:00:00 2001 From: Tomasz Gromadzki Date: Tue, 6 Dec 2022 12:31:36 +0100 Subject: [PATCH 8/8] prov/peer: Remove peer_cq implementation struct peer_cq is no longer needed. Signed-off-by: Tomasz Gromadzki --- include/ofi_peer.h | 7 --- prov/coll/src/coll_domain.c | 2 +- prov/peer/src/peer_cq.c | 108 ------------------------------------ 3 files changed, 1 insertion(+), 116 deletions(-) diff --git a/include/ofi_peer.h b/include/ofi_peer.h index 77c4332a39d..e8a8549efad 100644 --- a/include/ofi_peer.h +++ b/include/ofi_peer.h @@ -35,11 +35,4 @@ #include -int ofi_peer_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq_fid, void *context); - -ssize_t ofi_peer_cq_write(struct fid_cq *cq, void *context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag, - fi_addr_t src); - #endif /* _OFI_PEER_H_ */ diff --git a/prov/coll/src/coll_domain.c b/prov/coll/src/coll_domain.c index 3484f8019a1..3df60a4eb47 100644 --- a/prov/coll/src/coll_domain.c +++ b/prov/coll/src/coll_domain.c @@ -35,7 +35,7 @@ static struct fi_ops_domain coll_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = coll_av_open, - .cq_open = ofi_peer_cq_open, + .cq_open = fi_no_cq_open, .endpoint = coll_endpoint, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_no_cntr_open, diff --git a/prov/peer/src/peer_cq.c b/prov/peer/src/peer_cq.c index f41c95d2381..052361de85b 100644 --- a/prov/peer/src/peer_cq.c +++ b/prov/peer/src/peer_cq.c @@ -31,111 +31,3 @@ */ #include "peer.h" - -static int peer_cq_close(struct fid *fid) -{ - struct peer_cq *cq; - int ret; - - cq = container_of(fid, struct peer_cq, util_cq.cq_fid.fid); - - ret = ofi_cq_cleanup(&cq->util_cq); - if (ret) - return ret; - - free(cq); - return 0; -} - -static struct fi_ops peer_cq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = peer_cq_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_cq peer_cq_ops = { - .size = sizeof(struct fi_ops_cq), - .read = fi_no_cq_read, - .readfrom = fi_no_cq_readfrom, - .readerr = fi_no_cq_readerr, - .sread = fi_no_cq_sread, - .sreadfrom = fi_no_cq_sreadfrom, - .signal = fi_no_cq_signal, - .strerror = fi_no_cq_strerror, -}; - -static int peer_cq_init(const struct fi_provider* provider, - struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq_fid, struct fi_peer_cq_context *peer_context) -{ - struct peer_cq *cq; - int ret; - - if (!attr || !(attr->flags & FI_PEER)) { - FI_WARN(provider, FI_LOG_CORE, "FI_PEER flag required\n"); - return -FI_EINVAL; - } - - if (!peer_context || peer_context->size < sizeof(*peer_context)) { - FI_WARN(provider, FI_LOG_CORE, "invalid peer CQ context\n"); - return -FI_EINVAL; - } - - cq = calloc(1, sizeof(*cq)); - if (!cq) - return -FI_ENOMEM; - - cq->peer_cq = peer_context->cq; - - ret = ofi_cq_init(provider, domain, attr, &cq->util_cq, - &ofi_cq_progress, NULL); - if (ret) - goto err; - - *cq_fid = &cq->util_cq.cq_fid; - (*cq_fid)->fid.ops = &peer_cq_fi_ops; - (*cq_fid)->ops = &peer_cq_ops; - return 0; - -err: - free(cq); - return ret; -} - -int ofi_peer_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq_fid, void *context) -{ - struct util_domain *util_domain; - struct fi_peer_cq_context *peer_context; - - util_domain = container_of(domain, struct util_domain, domain_fid.fid); - peer_context = context; - - return peer_cq_init(util_domain->fabric->prov, domain, attr, - cq_fid, peer_context); -} - -ssize_t ofi_peer_cq_write(struct fid_cq *cq_fid, void *context, uint64_t flags, - size_t len, void *buf, uint64_t data, uint64_t tag, - fi_addr_t src) -{ - struct peer_cq *cq; - - cq = container_of(cq_fid, struct peer_cq, util_cq.cq_fid); - - return cq->peer_cq->owner_ops->write(cq->peer_cq, context, - flags, len, buf, data, tag, src); -} - - -ssize_t ofi_peer_cq_writeerr(struct fid_cq *cq_fid, - const struct fi_cq_err_entry *err_entry) -{ - struct peer_cq *cq; - - cq = container_of(cq_fid, struct peer_cq, util_cq.cq_fid); - - return cq->peer_cq->owner_ops->writeerr(cq->peer_cq, err_entry); -}