diff --git a/Makefile.am b/Makefile.am index deb7ce6f013..900007540d2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -85,11 +85,12 @@ common_srcs = \ prov/util/src/rocr_mem_monitor.c \ prov/util/src/ze_mem_monitor.c \ prov/util/src/cuda_ipc_monitor.c \ + prov/peer/src/peer_cq.c \ + prov/peer/src/peer.h \ prov/coll/src/coll_attr.c \ prov/coll/src/coll_av.c \ prov/coll/src/coll_av_set.c \ prov/coll/src/coll_coll.c \ - prov/coll/src/coll_cq.c \ prov/coll/src/coll_domain.c \ prov/coll/src/coll_ep.c \ prov/coll/src/coll_eq.c \ @@ -189,6 +190,7 @@ src_libfabric_la_SOURCES = \ include/ofi_net.h \ include/ofi_perf.h \ include/ofi_coll.h \ + include/ofi_peer.h \ include/fasthash.h \ include/rbtree.h \ include/uthash.h \ diff --git a/fabtests/Makefile.am b/fabtests/Makefile.am index c6113d5763f..3b31cbbb9ac 100644 --- a/fabtests/Makefile.am +++ b/fabtests/Makefile.am @@ -170,6 +170,7 @@ nobase_dist_config_DATA = \ pytest/efa/test_rnr.py \ pytest/efa/test_efa_info.py \ pytest/efa/test_efa_protocol_selection.py \ + pytest/efa/test_efa_device_selection.py \ pytest/efa/test_runt.py \ pytest/efa/test_fork_support.py \ pytest/efa/test_mr.py \ diff --git a/fabtests/pytest/efa/efa_common.py b/fabtests/pytest/efa/efa_common.py index ca68a78d181..539799b7d12 100644 --- a/fabtests/pytest/efa/efa_common.py +++ b/fabtests/pytest/efa/efa_common.py @@ -22,15 +22,22 @@ def efa_run_client_server_test(cmdline_args, executable, iteration_type, test.run() @retry(retry_on_exception=is_ssh_connection_error, stop_max_attempt_number=3, wait_fixed=5000) -def efa_retrieve_hw_counter_value(hostname, hw_counter_name): +def efa_retrieve_hw_counter_value(hostname, hw_counter_name, efa_device_name=None): """ retrieve the value of EFA's hardware counter hostname: a host that has efa hw_counter_name: EFA hardware counter name. Options are: lifespan, rdma_read_resp_bytes, rdma_read_wrs,recv_wrs, rx_drops, send_bytes, tx_bytes, rdma_read_bytes, rdma_read_wr_err, recv_bytes, rx_bytes, rx_pkts, send_wrs, tx_pkts + efa_device_name: Name of the EFA device. Corresponds to the name of the EFA device's directory return: an integer that is sum of all EFA device's counter """ - command = 'ssh {} cat "/sys/class/infiniband/*/ports/*/hw_counters/{}"'.format(hostname, hw_counter_name) + + if efa_device_name: + efa_device_dir = efa_device_name + else: + efa_device_dir = '*' + + command = 'ssh {} cat "/sys/class/infiniband/{}/ports/*/hw_counters/{}"'.format(hostname, efa_device_dir, hw_counter_name) process = subprocess.run(command, shell=True, check=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8") if process.returncode != 0: if process.stderr and has_ssh_connection_err_msg(process.stderr): @@ -70,3 +77,39 @@ def efa_retrieve_gid(hostname): return None return process.stdout.decode("utf-8").strip() + +@retry(retry_on_exception=is_ssh_connection_error, stop_max_attempt_number=3, wait_fixed=5000) +def get_efa_domain_names(server_id): + timeout = 60 + process_timed_out = False + + # This command returns a list of EFA domain names and its related info + command = "ssh {} fi_info -p efa".format(server_id) + p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8") + + try: + p.wait(timeout=timeout) + except subprocess.TimeoutExpired: + p.terminate() + process_timed_out = True + + assert not process_timed_out, "Process timed out" + + errors = p.stderr.readlines() + for error in errors: + error = error.strip() + if "fi_getinfo: -61" in error: + raise Exception("No EFA devices/domain names found") + + if has_ssh_connection_err_msg(error): + raise SshConnectionError() + + efa_domain_names = [] + for line in p.stdout: + line = line.strip() + if 'domain' in line: + domain_name = line.split(': ')[1] + efa_domain_names.append(domain_name) + + return efa_domain_names + diff --git a/fabtests/pytest/efa/test_efa_device_selection.py b/fabtests/pytest/efa/test_efa_device_selection.py new file mode 100644 index 00000000000..422a3195262 --- /dev/null +++ b/fabtests/pytest/efa/test_efa_device_selection.py @@ -0,0 +1,39 @@ +import pytest + + +# This test must be run in serial mode because it checks the hw counter +@pytest.mark.serial +@pytest.mark.functional +def test_efa_device_selection(cmdline_args): + from efa.efa_common import efa_retrieve_hw_counter_value, get_efa_domain_names + from common import ClientServerTest + + if cmdline_args.server_id == cmdline_args.client_id: + pytest.skip("EFA device selection test requires 2 nodes") + return + + efa_domain_names = get_efa_domain_names(cmdline_args.server_id) + + for efa_domain_name in efa_domain_names: + if '-rdm' in efa_domain_name: + assert not('-dgrm') in efa_domain_name + fabtest_opts = "fi_rdm_pingpong" + elif '-dgrm' in efa_domain_name: + assert not('-rdm') in efa_domain_name + fabtest_opts = "fi_dgram_pingpong -k" + + efa_device_name = efa_domain_name.split('-')[0] + + server_tx_bytes_before_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "tx_bytes", efa_device_name) + client_tx_bytes_before_test = efa_retrieve_hw_counter_value(cmdline_args.client_id, "tx_bytes", efa_device_name) + + executable = "{} -d {}".format(fabtest_opts, efa_domain_name) + test = ClientServerTest(cmdline_args, executable, message_size="1000", timeout=300) + test.run() + + server_tx_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.server_id, "tx_bytes", efa_device_name) + client_tx_bytes_after_test = efa_retrieve_hw_counter_value(cmdline_args.client_id, "tx_bytes", efa_device_name) + + # Verify EFA traffic + assert server_tx_bytes_before_test < server_tx_bytes_after_test + assert client_tx_bytes_before_test < client_tx_bytes_after_test diff --git a/include/ofi_peer.h b/include/ofi_peer.h new file mode 100644 index 00000000000..e8a8549efad --- /dev/null +++ b/include/ofi_peer.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _OFI_PEER_H_ +#define _OFI_PEER_H_ + +#include + +#endif /* _OFI_PEER_H_ */ diff --git a/prov/coll/src/coll.h b/prov/coll/src/coll.h index 1fcf6db1e25..bf554a79ccf 100644 --- a/prov/coll/src/coll.h +++ b/prov/coll/src/coll.h @@ -60,6 +60,8 @@ #include #include #include +#include +#include #define COLL_IOV_LIMIT 4 #define COLL_MR_MODES (OFI_MR_BASIC_MAP | FI_MR_LOCAL) @@ -81,11 +83,6 @@ struct coll_av { struct fid_peer_av *peer_av; }; -struct coll_cq { - struct util_cq util_cq; - struct fid_peer_cq *peer_cq; -}; - struct coll_eq { struct util_eq util_eq; struct fid_eq *peer_eq; @@ -101,6 +98,12 @@ struct coll_ep { */ struct fi_info *peer_info; struct fid_ep *peer_ep; + + /* + * local pointers to peer_cq - configured via bind() operation + */ + struct fid_peer_cq *tx_peer_cq; + struct fid_peer_cq *rx_peer_cq; }; struct coll_mr { diff --git a/prov/coll/src/coll_coll.c b/prov/coll/src/coll_coll.c index bd03e03d31e..90fe3bf5b0b 100644 --- a/prov/coll/src/coll_coll.c +++ b/prov/coll/src/coll_coll.c @@ -31,7 +31,6 @@ */ #include "coll.h" -#include "ofi_coll.h" static uint64_t coll_form_tag(uint32_t coll_id, uint32_t rank) { @@ -721,13 +720,12 @@ void coll_join_comp(struct util_coll_operation *coll_op) void coll_collective_comp(struct util_coll_operation *coll_op) { struct coll_ep *ep; - struct coll_cq *cq; - + int ret; ep = container_of(coll_op->ep, struct coll_ep, util_ep.ep_fid); - cq = container_of(ep->util_ep.tx_cq, struct coll_cq, util_cq); - if (cq->peer_cq->owner_ops->write(cq->peer_cq, coll_op->context, - FI_COLLECTIVE, 0, 0, 0, 0, 0)) + ret = ep->tx_peer_cq->owner_ops->write(ep->tx_peer_cq, coll_op->context, + FI_COLLECTIVE, 0, 0, 0, 0, 0); + if (ret) FI_WARN(ep->util_ep.domain->fabric->prov, FI_LOG_DOMAIN, "collective - cq write failed\n"); diff --git a/prov/coll/src/coll_cq.c b/prov/coll/src/coll_cq.c deleted file mode 100644 index 3c279b5113e..00000000000 --- a/prov/coll/src/coll_cq.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2022 Intel Corporation. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "coll.h" - -static int coll_cq_close(struct fid *fid) -{ - struct coll_cq *cq; - int ret; - - cq = container_of(fid, struct coll_cq, util_cq.cq_fid.fid); - - ret = ofi_cq_cleanup(&cq->util_cq); - if (ret) - return ret; - - free(cq); - return 0; -} - -static struct fi_ops coll_cq_fi_ops = { - .size = sizeof(struct fi_ops), - .close = coll_cq_close, - .bind = fi_no_bind, - .control = fi_no_control, - .ops_open = fi_no_ops_open, -}; - -static struct fi_ops_cq coll_cq_ops = { - .size = sizeof(struct fi_ops_cq), - .read = fi_no_cq_read, - .readfrom = fi_no_cq_readfrom, - .readerr = fi_no_cq_readerr, - .sread = fi_no_cq_sread, - .sreadfrom = fi_no_cq_sreadfrom, - .signal = fi_no_cq_signal, - .strerror = fi_no_cq_strerror, -}; - -int coll_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, - struct fid_cq **cq_fid, void *context) -{ - struct coll_cq *cq; - struct fi_peer_cq_context *peer_context = context; - int ret; - - if (!attr || !(attr->flags & FI_PEER)) { - FI_WARN(&coll_prov, FI_LOG_CORE, "FI_PEER flag required\n"); - return -EINVAL; - } - - if (!peer_context || peer_context->size < sizeof(*peer_context)) { - FI_WARN(&coll_prov, FI_LOG_CORE, "invalid peer CQ context\n"); - return -EINVAL; - } - - cq = calloc(1, sizeof(*cq)); - if (!cq) - return -FI_ENOMEM; - - cq->peer_cq = peer_context->cq; - - ret = ofi_cq_init(&coll_prov, domain, attr, &cq->util_cq, &ofi_cq_progress, - context); - if (ret) - goto err; - - *cq_fid = &cq->util_cq.cq_fid; - (*cq_fid)->fid.ops = &coll_cq_fi_ops; - (*cq_fid)->ops = &coll_cq_ops; - return 0; - -err: - free(cq); - return ret; -} diff --git a/prov/coll/src/coll_domain.c b/prov/coll/src/coll_domain.c index fbf1f2272a3..3df60a4eb47 100644 --- a/prov/coll/src/coll_domain.c +++ b/prov/coll/src/coll_domain.c @@ -35,7 +35,7 @@ static struct fi_ops_domain coll_domain_ops = { .size = sizeof(struct fi_ops_domain), .av_open = coll_av_open, - .cq_open = coll_cq_open, + .cq_open = fi_no_cq_open, .endpoint = coll_endpoint, .scalable_ep = fi_no_scalable_ep, .cntr_open = fi_no_cntr_open, diff --git a/prov/coll/src/coll_ep.c b/prov/coll/src/coll_ep.c index aa76c2711c4..988040c18e0 100644 --- a/prov/coll/src/coll_ep.c +++ b/prov/coll/src/coll_ep.c @@ -79,6 +79,30 @@ static struct fi_ops_collective coll_ops_collective = { .msg = fi_coll_no_msg, }; +static int +coll_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) +{ + struct coll_ep *ep; + struct fid_peer_cq *peer_cq; + + if (bfid->fclass != FI_CLASS_PEER_CQ) { + return ofi_ep_fid_bind(ep_fid, bfid, flags); + } + if (!(flags & (FI_TRANSMIT | FI_RECV))) { + return -FI_EINVAL; + } + + ep = container_of(ep_fid, struct coll_ep, util_ep.ep_fid.fid); + peer_cq = container_of(bfid, struct fid_peer_cq, fid); + + if (flags & FI_TRANSMIT) + ep->tx_peer_cq = peer_cq; + if (flags & FI_RECV) + ep->rx_peer_cq = peer_cq; + return FI_SUCCESS; +} + + static int coll_ep_close(struct fid *fid) { struct coll_ep *ep; @@ -103,7 +127,7 @@ static int coll_ep_ctrl(struct fid *fid, int command, void *arg) static struct fi_ops coll_ep_fi_ops = { .size = sizeof(struct fi_ops), .close = coll_ep_close, - .bind = ofi_ep_fid_bind, + .bind = coll_ep_bind, .control = coll_ep_ctrl, .ops_open = fi_no_ops_open, }; diff --git a/prov/efa/src/efa_prov_info.c b/prov/efa/src/efa_prov_info.c index 71897f54764..836fe5ca185 100644 --- a/prov/efa/src/efa_prov_info.c +++ b/prov/efa/src/efa_prov_info.c @@ -722,3 +722,21 @@ int efa_prov_info_compare_src_addr(const char *node, uint64_t flags, const struc return 0; } +/* + * @brief Compare the domain name specified via hints and match it with the + * domain name in prov_info + * + * @param info[in] info object + * @param hints[in] hints from user's call to fi_getinfo() + * + * return 1 - If the names are different + * 0 - No difference, names match. + */ +int efa_prov_info_compare_domain_name(const struct fi_info *hints, + const struct fi_info *info) +{ + if (hints && hints->domain_attr && hints->domain_attr->name) + return strcmp(info->domain_attr->name, hints->domain_attr->name); + + return 0; +} diff --git a/prov/efa/src/efa_prov_info.h b/prov/efa/src/efa_prov_info.h index b185529d2b7..e8aaae65d6e 100644 --- a/prov/efa/src/efa_prov_info.h +++ b/prov/efa/src/efa_prov_info.h @@ -45,4 +45,7 @@ int efa_prov_info_alloc_for_rxr(struct fi_info **prov_info_rxr, int efa_prov_info_compare_src_addr(const char *node, uint64_t flags, const struct fi_info *hints, const struct fi_info *fi); +int efa_prov_info_compare_domain_name(const struct fi_info *hints, + const struct fi_info *info); + #endif diff --git a/prov/efa/src/efa_user_info.c b/prov/efa/src/efa_user_info.c index 1214d9608da..e11bf446a92 100644 --- a/prov/efa/src/efa_user_info.c +++ b/prov/efa/src/efa_user_info.c @@ -218,6 +218,10 @@ int efa_user_info_get_dgram(uint32_t version, const char *node, const char *serv if (ret) continue; + ret = efa_prov_info_compare_domain_name(hints, prov_info_dgram); + if (ret) + continue; + EFA_INFO(FI_LOG_FABRIC, "found match for interface %s %s\n", node, prov_info_dgram->fabric_attr->name); if (hints) { @@ -446,6 +450,10 @@ int efa_user_info_get_rdm(uint32_t version, const char *node, if (ret) continue; + ret = efa_prov_info_compare_domain_name(hints, prov_info_rxr); + if (ret) + continue; + dupinfo = fi_dupinfo(prov_info_rxr); if (!dupinfo) { ret = -FI_ENOMEM; diff --git a/prov/efa/src/rxr/rxr_ep.c b/prov/efa/src/rxr/rxr_ep.c index 7ffad730b7c..a08a2beb6b1 100644 --- a/prov/efa/src/rxr/rxr_ep.c +++ b/prov/efa/src/rxr/rxr_ep.c @@ -957,8 +957,8 @@ static int rxr_ep_ctrl(struct fid *fid, int command, void *arg) ssize_t ret; struct rxr_ep *ep; struct efa_domain *efa_domain; - char shm_ep_name[EFA_SHM_NAME_MAX]; - size_t shm_ep_name_len; + char shm_ep_name[EFA_SHM_NAME_MAX], ep_addr_str[OFI_ADDRSTRLEN]; + size_t shm_ep_name_len, ep_addr_strlen; switch (command) { case FI_ENABLE: @@ -985,6 +985,11 @@ static int rxr_ep_ctrl(struct fid *fid, int command, void *arg) FI_DBG(&rxr_prov, FI_LOG_EP_CTRL, "core_addrlen = %ld\n", ep->core_addrlen); + ep_addr_strlen = sizeof(ep_addr_str); + rxr_ep_raw_addr_str(ep, ep_addr_str, &ep_addr_strlen); + FI_WARN(&rxr_prov, FI_LOG_EP_CTRL, "libfabric %s efa endpoint created! address: %s\n", + fi_tostr("1", FI_TYPE_VERSION), ep_addr_str); + /* Enable shm provider endpoint & post recv buff. * Once core ep enabled, 18 bytes efa_addr (16 bytes raw + 2 bytes qpn) is set. * We convert the address to 'gid_qpn' format, and set it as shm ep name, so diff --git a/prov/peer/src/peer.h b/prov/peer/src/peer.h new file mode 100644 index 00000000000..645360346d0 --- /dev/null +++ b/prov/peer/src/peer.h @@ -0,0 +1,51 @@ + +/* + * Copyright (c) 2022 Intel Corporation, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _PEER_H_ +#define _PEER_H_ + +#if HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +struct peer_cq { + struct util_cq util_cq; + struct fid_peer_cq *peer_cq; +}; + +#endif /* _PEER_H_ */ + diff --git a/prov/peer/src/peer_cq.c b/prov/peer/src/peer_cq.c new file mode 100644 index 00000000000..052361de85b --- /dev/null +++ b/prov/peer/src/peer_cq.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2022 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "peer.h" diff --git a/prov/rxm/src/rxm.h b/prov/rxm/src/rxm.h index 0fe48082a5f..a663cfb8f10 100644 --- a/prov/rxm/src/rxm.h +++ b/prov/rxm/src/rxm.h @@ -280,7 +280,6 @@ struct rxm_domain { struct rxm_cq { struct util_cq util_cq; struct fid_peer_cq peer_cq; - struct fid_cq *util_coll_cq; struct fid_cq *offload_coll_cq; }; diff --git a/prov/rxm/src/rxm_cq.c b/prov/rxm/src/rxm_cq.c index 24c4dcd354d..cbf742e3ea3 100644 --- a/prov/rxm/src/rxm_cq.c +++ b/prov/rxm/src/rxm_cq.c @@ -2071,9 +2071,6 @@ static int rxm_cq_close(struct fid *fid) if (rxm_cq->offload_coll_cq) fi_close(&rxm_cq->offload_coll_cq->fid); - if (rxm_cq->util_coll_cq) - fi_close(&rxm_cq->util_coll_cq->fid); - ret = ofi_cq_cleanup(&rxm_cq->util_cq); if (ret) retv = ret; @@ -2145,13 +2142,6 @@ int rxm_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, rxm_cq->peer_cq.owner_ops = &rxm_cq_owner_ops; peer_cq_context.cq = &rxm_cq->peer_cq; - if (rxm_domain->util_coll_domain) { - ret = fi_cq_open(rxm_domain->util_coll_domain, &peer_cq_attr, - &rxm_cq->util_coll_cq, &peer_cq_context); - if (ret) - goto err2; - } - if (rxm_domain->offload_coll_domain) { ret = fi_cq_open(rxm_domain->offload_coll_domain, &peer_cq_attr, &rxm_cq->offload_coll_cq, &peer_cq_context); @@ -2166,8 +2156,6 @@ int rxm_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr, return 0; err2: - if (rxm_cq->util_coll_cq) - fi_close(&rxm_cq->util_coll_cq->fid); ofi_cq_cleanup(&rxm_cq->util_cq); err1: free(rxm_cq); diff --git a/prov/rxm/src/rxm_ep.c b/prov/rxm/src/rxm_ep.c index 7f60a124c15..b37b93e5ec5 100644 --- a/prov/rxm/src/rxm_ep.c +++ b/prov/rxm/src/rxm_ep.c @@ -1623,10 +1623,9 @@ static int rxm_ep_bind(struct fid *ep_fid, struct fid *bfid, uint64_t flags) case FI_CLASS_CQ: rxm_cq = container_of(bfid, struct rxm_cq, util_cq.cq_fid.fid); - if (rxm_ep->util_coll_ep && rxm_cq->util_coll_cq) { - ret = ofi_ep_fid_bind(&rxm_ep->util_coll_ep->fid, - &rxm_cq->util_coll_cq->fid, - flags); + if (rxm_ep->util_coll_ep) { + ret = fi_ep_bind(rxm_ep->util_coll_ep, + &rxm_cq->peer_cq.fid, flags); if (ret) retv = ret; }