From 9d4692dc2d1d19f7d6f94bf666fbafa8aa575b33 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Sun, 17 Mar 2024 20:01:14 -0700 Subject: [PATCH] prov/shm: Close device_fds for connected peers when the EP is closed This fixes a file descriptor leak which is not obvious for regular applications. For fi_ubertest, however, each sub-test goes through a setup-teardown process the file descriptors can quickly run out due to this leak. Cherry-picked from commit b555e518d4fc747db2369edea386fecc177a6ee1 Signed-off-by: Jianxin Xiong --- prov/shm/src/smr_ep.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/prov/shm/src/smr_ep.c b/prov/shm/src/smr_ep.c index c381cd029c2..18c3d1fd9dd 100644 --- a/prov/shm/src/smr_ep.c +++ b/prov/shm/src/smr_ep.c @@ -811,6 +811,21 @@ static void smr_cleanup_epoll(struct smr_sock_info *sock_info) ofi_epoll_close(sock_info->epollfd); } +static void smr_free_sock_info(struct smr_ep *ep) +{ + int i, j; + + for (i = 0; i < SMR_MAX_PEERS; i++) { + if (!ep->sock_info->peers[i].device_fds) + continue; + for (j = 0; j < ep->sock_info->nfds; j++) + close(ep->sock_info->peers[i].device_fds[j]); + free(ep->sock_info->peers[i].device_fds); + } + free(ep->sock_info); + ep->sock_info = NULL; +} + static int smr_ep_close(struct fid *fid) { struct smr_ep *ep; @@ -826,7 +841,7 @@ static int smr_ep_close(struct fid *fid) close(ep->sock_info->listen_sock); unlink(ep->sock_info->name); smr_cleanup_epoll(ep->sock_info); - free(ep->sock_info); + smr_free_sock_info(ep); } if (ep->srx && ep->util_ep.ep_fid.msg != &smr_no_recv_msg_ops) @@ -1173,19 +1188,6 @@ void smr_ep_exchange_fds(struct smr_ep *ep, int64_t id) SMR_CMAP_FAILED : SMR_CMAP_SUCCESS; } -static void smr_free_sock_info(struct smr_ep *ep) -{ - int i, j; - - for (i = 0; i < SMR_MAX_PEERS; i++) { - for (j = 0; j < ep->sock_info->nfds; j++) - close(ep->sock_info->peers[i].device_fds[j]); - free(ep->sock_info->peers[i].device_fds); - } - free(ep->sock_info); - ep->sock_info = NULL; -} - static void smr_init_ipc_socket(struct smr_ep *ep) { struct smr_sock_name *sock_name;