From 710a2367220dc53a933eba2236cb56dfe2d82c12 Mon Sep 17 00:00:00 2001 From: Leonardo Di Giovanna Date: Fri, 24 Jan 2025 11:23:06 +0100 Subject: [PATCH 1/2] feat(userspace/libscap): add API for getting single fd info Signed-off-by: Leonardo Di Giovanna --- userspace/libscap/engine/gvisor/gvisor.cpp | 1 + userspace/libscap/linux/scap_fds.c | 142 +++++++++++------- userspace/libscap/linux/scap_linux_int.h | 12 ++ userspace/libscap/linux/scap_linux_platform.c | 1 + userspace/libscap/linux/scap_procs.c | 25 +++ userspace/libscap/scap_platform_api.c | 12 ++ userspace/libscap/scap_platform_api.h | 8 + userspace/libscap/scap_platform_impl.h | 4 + 8 files changed, 152 insertions(+), 53 deletions(-) diff --git a/userspace/libscap/engine/gvisor/gvisor.cpp b/userspace/libscap/engine/gvisor/gvisor.cpp index f76bf7a023..bd625ae1ea 100644 --- a/userspace/libscap/engine/gvisor/gvisor.cpp +++ b/userspace/libscap/engine/gvisor/gvisor.cpp @@ -124,6 +124,7 @@ const scap_platform_vtable scap_gvisor_platform_vtable = { .get_global_pid = NULL, .get_threadlist = gvisor_get_threadlist, .get_fdlist = NULL, + .get_fdinfo = NULL, .close_platform = scap_gvisor_close_platform, .free_platform = scap_gvisor_free_platform, diff --git a/userspace/libscap/linux/scap_fds.c b/userspace/libscap/linux/scap_fds.c index 2d7b7aaa69..f98ac3f32f 100644 --- a/userspace/libscap/linux/scap_fds.c +++ b/userspace/libscap/linux/scap_fds.c @@ -1156,6 +1156,48 @@ char *decode_st_mode(struct stat *sb) { break; } } + +static int32_t handle_file(struct scap_proclist *proclist, + char *f_name, + scap_threadinfo *tinfo, + scap_fdinfo *fdi, + char *procdir, + struct stat const *const sb, + uint64_t const net_ns, + struct scap_ns_socket_list **sockets_by_ns, + char *error) { + switch(sb->st_mode & S_IFMT) { + case S_IFIFO: + fdi->type = SCAP_FD_FIFO; + return scap_fd_handle_pipe(proclist, f_name, tinfo, fdi, error); + case S_IFREG: + case S_IFBLK: + case S_IFCHR: + case S_IFLNK: + fdi->type = SCAP_FD_FILE_V2; + fdi->ino = sb->st_ino; + return scap_fd_handle_regular_file(proclist, f_name, tinfo, fdi, procdir, error); + case S_IFDIR: + fdi->type = SCAP_FD_DIRECTORY; + fdi->ino = sb->st_ino; + return scap_fd_handle_regular_file(proclist, f_name, tinfo, fdi, procdir, error); + case S_IFSOCK: + fdi->type = SCAP_FD_UNKNOWN; + return scap_fd_handle_socket(proclist, + f_name, + tinfo, + fdi, + procdir, + net_ns, + sockets_by_ns, + error); + default: + fdi->type = SCAP_FD_UNSUPPORTED; + fdi->ino = sb->st_ino; + return scap_fd_handle_regular_file(proclist, f_name, tinfo, fdi, procdir, error); + } +} + // // Scan the directory containing the fd's of a proc /proc/x/fd // @@ -1171,44 +1213,36 @@ int32_t scap_fd_scan_fd_dir(struct scap_linux_platform *linux_platform, int32_t res = SCAP_SUCCESS; char fd_dir_name[SCAP_MAX_PATH_SIZE]; char f_name[SCAP_MAX_PATH_SIZE]; - char link_name[SCAP_MAX_PATH_SIZE]; struct stat sb; uint64_t fd; scap_fdinfo fdi = {}; uint64_t net_ns; - ssize_t r; uint32_t fd_added = 0; if(num_fds_ret != NULL) { *num_fds_ret = 0; } - snprintf(fd_dir_name, SCAP_MAX_PATH_SIZE, "%sfd", procdir); + snprintf(fd_dir_name, sizeof(fd_dir_name), "%sfd", procdir); dir_p = opendir(fd_dir_name); if(dir_p == NULL) { snprintf(error, SCAP_LASTERR_SIZE, "error opening the directory %s", fd_dir_name); return SCAP_NOTFOUND; } - // - // Get the network namespace of the process - // + // Get the network namespace of the process. snprintf(f_name, sizeof(f_name), "%sns/net", procdir); - r = readlink(f_name, link_name, sizeof(link_name) - 1); - if(r <= 0) { - // - // No network namespace available. Assume global - // + if(stat(f_name, &sb) == -1) { + // Assume default network namespace. net_ns = 0; } else { - link_name[r] = '\0'; - sscanf(link_name, "net:[%" PRIi64 "]", &net_ns); + net_ns = sb.st_ino; } while((dir_entry_p = readdir(dir_p)) != NULL && (linux_platform->m_fd_lookup_limit == 0 || fd_added < linux_platform->m_fd_lookup_limit)) { - snprintf(f_name, SCAP_MAX_PATH_SIZE, "%s/%s", fd_dir_name, dir_entry_p->d_name); + snprintf(f_name, sizeof(f_name), "%s/%s", fd_dir_name, dir_entry_p->d_name); if(-1 == stat(f_name, &sb) || 1 != sscanf(dir_entry_p->d_name, "%" PRIu64, &fd)) { continue; @@ -1216,52 +1250,17 @@ int32_t scap_fd_scan_fd_dir(struct scap_linux_platform *linux_platform, fdi.fd = fd; // In no driver mode to limit cpu usage we just parse sockets - // because we are interested only on them + // because we are interested only on them. if(linux_platform->m_minimal_scan && !S_ISSOCK(sb.st_mode)) { continue; } - switch(sb.st_mode & S_IFMT) { - case S_IFIFO: - fdi.type = SCAP_FD_FIFO; - res = scap_fd_handle_pipe(proclist, f_name, tinfo, &fdi, error); - break; - case S_IFREG: - case S_IFBLK: - case S_IFCHR: - case S_IFLNK: - fdi.type = SCAP_FD_FILE_V2; - fdi.ino = sb.st_ino; - res = scap_fd_handle_regular_file(proclist, f_name, tinfo, &fdi, procdir, error); - break; - case S_IFDIR: - fdi.type = SCAP_FD_DIRECTORY; - fdi.ino = sb.st_ino; - res = scap_fd_handle_regular_file(proclist, f_name, tinfo, &fdi, procdir, error); - break; - case S_IFSOCK: - fdi.type = SCAP_FD_UNKNOWN; - res = scap_fd_handle_socket(proclist, - f_name, - tinfo, - &fdi, - procdir, - net_ns, - sockets_by_ns, - error); - break; - default: - fdi.type = SCAP_FD_UNSUPPORTED; - fdi.ino = sb.st_ino; - res = scap_fd_handle_regular_file(proclist, f_name, tinfo, &fdi, procdir, error); + if(handle_file(proclist, f_name, tinfo, &fdi, procdir, &sb, net_ns, sockets_by_ns, error) != + SCAP_SUCCESS) { break; } - if(SCAP_SUCCESS != res) { - break; - } else { - ++fd_added; - } + ++fd_added; } closedir(dir_p); @@ -1271,3 +1270,40 @@ int32_t scap_fd_scan_fd_dir(struct scap_linux_platform *linux_platform, return res; } + +int32_t scap_fd_get_fdinfo(struct scap_linux_platform const *const linux_platform, + struct scap_proclist *proclist, + char *procdir, + scap_threadinfo *tinfo, + int const fd, + struct scap_ns_socket_list **sockets_by_ns, + char *error) { + char f_name[SCAP_MAX_PATH_SIZE]; + struct stat sb; + uint64_t net_ns; + scap_fdinfo fdi = {}; + + // Get the network namespace of the process. + snprintf(f_name, sizeof(f_name), "%sns/net", procdir); + if(stat(f_name, &sb) == -1) { + // Assume default network namespace. + net_ns = 0; + } else { + net_ns = sb.st_ino; + } + + // Get file descriptor stat. + snprintf(f_name, sizeof(f_name), "%sfd/%d", procdir, fd); + if(stat(f_name, &sb) == -1) { + return SCAP_NOTFOUND; + } + fdi.fd = fd; + + // In no driver mode to limit cpu usage we just parse sockets + // because we are interested only on them. + if(linux_platform->m_minimal_scan && !S_ISSOCK(sb.st_mode)) { + return EXIT_SUCCESS; + } + + return handle_file(proclist, f_name, tinfo, &fdi, procdir, &sb, net_ns, sockets_by_ns, error); +} diff --git a/userspace/libscap/linux/scap_linux_int.h b/userspace/libscap/linux/scap_linux_int.h index d0fe2897d3..e45f4401cf 100644 --- a/userspace/libscap/linux/scap_linux_int.h +++ b/userspace/libscap/linux/scap_linux_int.h @@ -65,6 +65,10 @@ int32_t scap_linux_get_threadlist(struct scap_platform* platform, int32_t scap_linux_get_fdlist(struct scap_platform* platform, struct scap_threadinfo* tinfo, char* lasterr); +int32_t scap_linux_get_fdinfo(struct scap_platform* platform, + struct scap_threadinfo* tinfo, + int fd, + char* lasterr); // read all sockets and add them to the socket table hashed by their ino int32_t scap_fd_read_sockets(char* procdir, struct scap_ns_socket_list* sockets, char* error); @@ -77,3 +81,11 @@ int32_t scap_fd_scan_fd_dir(struct scap_linux_platform* linux_platform, struct scap_ns_socket_list** sockets_by_ns, uint64_t* num_fds_ret, char* error); +// read the file descriptor info for a given process directory +int32_t scap_fd_get_fdinfo(struct scap_linux_platform const* linux_platform, + struct scap_proclist* proclist, + char* procdir, + scap_threadinfo* tinfo, + int fd, + struct scap_ns_socket_list** sockets_by_ns, + char* error); diff --git a/userspace/libscap/linux/scap_linux_platform.c b/userspace/libscap/linux/scap_linux_platform.c index aeff697197..2ada6f1f03 100644 --- a/userspace/libscap/linux/scap_linux_platform.c +++ b/userspace/libscap/linux/scap_linux_platform.c @@ -117,6 +117,7 @@ static const struct scap_platform_vtable scap_linux_platform_vtable = { .get_global_pid = scap_linux_getpid_global, .get_threadlist = scap_linux_get_threadlist, .get_fdlist = scap_linux_get_fdlist, + .get_fdinfo = scap_linux_get_fdinfo, .close_platform = scap_linux_close_platform, .free_platform = scap_linux_free_platform, }; diff --git a/userspace/libscap/linux/scap_procs.c b/userspace/libscap/linux/scap_procs.c index 384184ce22..cc77622bb9 100644 --- a/userspace/libscap/linux/scap_procs.c +++ b/userspace/libscap/linux/scap_procs.c @@ -1328,3 +1328,28 @@ int32_t scap_linux_get_fdlist(struct scap_platform* platform, } return res; } + +int32_t scap_linux_get_fdinfo(struct scap_platform* platform, + struct scap_threadinfo* tinfo, + int const fd, + char* lasterr) { + int res = SCAP_SUCCESS; + char proc_dir[SCAP_MAX_PATH_SIZE]; + struct scap_ns_socket_list* sockets_by_ns = NULL; + struct scap_linux_platform* linux_platform = (struct scap_linux_platform*)platform; + + // We get file descriptor info from the main thread + snprintf(proc_dir, sizeof(proc_dir), "%s/proc/%lu/", scap_get_host_root(), tinfo->pid); + + res = scap_fd_get_fdinfo(linux_platform, + &platform->m_proclist, + proc_dir, + tinfo, + fd, + &sockets_by_ns, + lasterr); + if(sockets_by_ns != NULL && sockets_by_ns != (void*)-1) { + scap_fd_free_ns_sockets_list(&sockets_by_ns); + } + return res; +} diff --git a/userspace/libscap/scap_platform_api.c b/userspace/libscap/scap_platform_api.c index 3853d6d9a0..b5c4f9fe84 100644 --- a/userspace/libscap/scap_platform_api.c +++ b/userspace/libscap/scap_platform_api.c @@ -149,3 +149,15 @@ int32_t scap_get_fdlist(struct scap_platform* platform, snprintf(error, SCAP_LASTERR_SIZE, "operation not supported"); return SCAP_FAILURE; } + +int32_t scap_get_fdinfo(struct scap_platform* platform, + struct scap_threadinfo* tinfo, + int const fd, + char* error) { + if(platform && platform->m_vtable->get_fdinfo) { + return platform->m_vtable->get_fdinfo(platform, tinfo, fd, error); + } + + snprintf(error, SCAP_LASTERR_SIZE, "operation not supported"); + return SCAP_FAILURE; +} diff --git a/userspace/libscap/scap_platform_api.h b/userspace/libscap/scap_platform_api.h index ee88ceb572..b69919d408 100644 --- a/userspace/libscap/scap_platform_api.h +++ b/userspace/libscap/scap_platform_api.h @@ -113,6 +113,14 @@ struct ppm_proclist_info* scap_get_threadlist(struct scap_platform* platform, ch */ int32_t scap_get_fdlist(struct scap_platform* platform, struct scap_threadinfo* tinfo, char* error); +/*! + \brief Get the file descriptor info for a given pid. +*/ +int32_t scap_get_fdinfo(struct scap_platform* platform, + struct scap_threadinfo* tinfo, + int fd, + char* error); + #ifdef __cplusplus }; #endif diff --git a/userspace/libscap/scap_platform_impl.h b/userspace/libscap/scap_platform_impl.h index e5929e5662..746ab81acb 100644 --- a/userspace/libscap/scap_platform_impl.h +++ b/userspace/libscap/scap_platform_impl.h @@ -72,6 +72,10 @@ struct scap_platform_vtable { int32_t (*get_fdlist)(struct scap_platform* platform, struct scap_threadinfo* tinfo, char* lasterr); + int32_t (*get_fdinfo)(struct scap_platform* platform, + struct scap_threadinfo* tinfo, + int fd, + char* lasterr); // close the platform structure // clean up all data, make it ready for another call to `init_platform` From 3b34a3a6f7ff18f1d3d202b3b457bbb76ef91a7d Mon Sep 17 00:00:00 2001 From: Leonardo Di Giovanna Date: Fri, 24 Jan 2025 11:52:07 +0100 Subject: [PATCH 2/2] feat(userspace/libsinsp): improve recvmsg SCM_RIGHTS cmsg handling Parse all control messages instead of parsing just the first one. Leverage the new scap_get_fdinfo API to get info only from the file in procfs associated to the file descriptor, instead of scanning each time the entire procfs fd directory. Signed-off-by: Leonardo Di Giovanna Co-authored-by: Roberto Scolaro --- userspace/libsinsp/parsers.cpp | 133 ++++++++++++++++++++++++--------- userspace/libsinsp/parsers.h | 10 +++ 2 files changed, 109 insertions(+), 34 deletions(-) diff --git a/userspace/libsinsp/parsers.cpp b/userspace/libsinsp/parsers.cpp index f6212bcc04..eea8cc1b4a 100644 --- a/userspace/libsinsp/parsers.cpp +++ b/userspace/libsinsp/parsers.cpp @@ -3662,6 +3662,104 @@ void sinsp_parser::parse_fspath_related_exit(sinsp_evt *evt) { } } +#ifndef _WIN32 +// ppm_cmsghdr is a mirror of the POSIX cmsghdr structure. The fundamental assumption when working +// with it is that actual control message variable-size data follows this (padding-aligned) header. +struct ppm_cmsghdr { + // Length of ppm_cmsghdr structure plus data following it. + size_t cmsg_len; + // Originating protocol. + int cmsg_level; + // Protocol specific type. + int cmsg_type; +}; + +// PPM_CMSG_* macros definitions. Their purpose is to manipulate ppm_cmsghdr structure and fields. +// Majority of them are equivalent to the corresponding variants without PPM_* prefix, but they +// don't depend on msghdr definition (as we don't need it at the moment). +#define PPM_CMSG_FIRSTHDR(msg_control, msg_controllen) \ + ((size_t)msg_controllen >= sizeof(ppm_cmsghdr) ? (ppm_cmsghdr *)msg_control : (ppm_cmsghdr *)0) + +#define PPM_CMSG_UNALIGNED_READ(cmsg, field, dest) \ + (memcpy((void *)&(dest), \ + ((char *)(cmsg)) + offsetof(ppm_cmsghdr, field), \ + sizeof((cmsg)->field))) + +#define PPM_CMSG_ALIGN(len) (((len) + sizeof(size_t) - 1) & (size_t) ~(sizeof(size_t) - 1)) + +#define PPM_CMSG_NXTHDR(msg_control, msg_controllen, cmsg) \ + ppm_cmsg_nxthdr(msg_control, msg_controllen, cmsg) +static ppm_cmsghdr *ppm_cmsg_nxthdr(char const *msg_control, + size_t const msg_controllen, + ppm_cmsghdr *cmsg) { + size_t cmsg_len; + PPM_CMSG_UNALIGNED_READ(cmsg, cmsg_len, cmsg_len); + if(cmsg_len < sizeof(ppm_cmsghdr)) { + return nullptr; + } + + size_t const cmsg_aligned_len = PPM_CMSG_ALIGN(cmsg_len); + cmsg = reinterpret_cast(reinterpret_cast(cmsg) + cmsg_aligned_len); + if(reinterpret_cast(cmsg + 1) > msg_control + msg_controllen || + reinterpret_cast(cmsg) + cmsg_aligned_len > msg_control + msg_controllen) { + return nullptr; + } + return cmsg; +} + +#define PPM_CMSG_DATA(cmsg) ((char *)((ppm_cmsghdr *)(cmsg) + 1)) + +inline void sinsp_parser::process_recvmsg_ancillary_data_fds(int const *fds, + size_t const fds_len, + scap_threadinfo *scap_tinfo, + char *error) const { + for(int i = 0; i < fds_len; i++) { + if(scap_get_fdinfo(m_inspector->get_scap_platform(), scap_tinfo, fds[i], error) != + SCAP_SUCCESS) { + libsinsp_logger()->format( + sinsp_logger::SEV_DEBUG, + "scap_get_fdinfo failed: %s, proc table will not be updated with new fd.", + error); + } + } +} + +inline void sinsp_parser::process_recvmsg_ancillary_data(sinsp_evt *evt, + sinsp_evt_param const *parinfo) const { + // Seek for SCM_RIGHTS control message headers and extract passed file descriptors. + char const *msg_ctrl = parinfo->m_val; + size_t const msg_ctrllen = parinfo->m_len; + for(ppm_cmsghdr *cmsg = PPM_CMSG_FIRSTHDR(msg_ctrl, msg_ctrllen); cmsg != nullptr; + cmsg = PPM_CMSG_NXTHDR(msg_ctrl, msg_ctrllen, cmsg)) { + int cmsg_type; + PPM_CMSG_UNALIGNED_READ(cmsg, cmsg_type, cmsg_type); + if(cmsg_type != SCM_RIGHTS) { + continue; + } + // Found SCM_RIGHT control message. Process it. + char error[SCAP_LASTERR_SIZE]; + scap_threadinfo scap_tinfo{}; + memset(&scap_tinfo, 0, sizeof(scap_tinfo)); + m_inspector->m_thread_manager->thread_to_scap(*evt->get_tinfo(), &scap_tinfo); +#define SCM_MAX_FD 253 // Taken from kernel. + int fds[SCM_MAX_FD]; + size_t cmsg_len; + PPM_CMSG_UNALIGNED_READ(cmsg, cmsg_len, cmsg_len); + unsigned long const data_size = cmsg_len - CMSG_LEN(0); + unsigned long const fds_len = data_size / sizeof(int); + // Guard against malformed event, by checking that data size is a multiple of + // sizeof(int) (file descriptor size) and the control message doesn't contain more + // data than allowed by kernel constraints. + if(data_size % sizeof(int) || fds_len > SCM_MAX_FD) { + continue; + } +#undef SCM_MAX_FD + memcpy(&fds, PPM_CMSG_DATA(cmsg), data_size); + process_recvmsg_ancillary_data_fds(fds, fds_len, &scap_tinfo, error); + } +} +#endif // _WIN32 + void sinsp_parser::parse_rw_exit(sinsp_evt *evt) { const sinsp_evt_param *parinfo; int64_t retval; @@ -3792,40 +3890,7 @@ void sinsp_parser::parse_rw_exit(sinsp_evt *evt) { if(cmparam != -1) { parinfo = evt->get_param(cmparam); - if(parinfo->m_len > sizeof(cmsghdr)) { - cmsghdr cmsg; - memcpy(&cmsg, parinfo->m_val, sizeof(cmsghdr)); - if(cmsg.cmsg_type == SCM_RIGHTS) { - char error[SCAP_LASTERR_SIZE]; - scap_threadinfo scap_tinfo{}; - - memset(&scap_tinfo, 0, sizeof(scap_tinfo)); - - m_inspector->m_thread_manager->thread_to_scap(*evt->get_tinfo(), - &scap_tinfo); - - // Store current fd; it might get changed by scap_get_fdlist below. - int64_t fd = -1; - if(evt->get_fd_info()) { - fd = evt->get_fd_info()->m_fd; - } - - // Get the new fds. The callbacks we have registered populate the fd table - // with the new file descriptors. - if(scap_get_fdlist(m_inspector->get_scap_platform(), &scap_tinfo, error) != - SCAP_SUCCESS) { - libsinsp_logger()->format(sinsp_logger::SEV_DEBUG, - "scap_get_fdlist failed: %s, proc table will " - "not be updated with new fds.", - error); - } - - // Force refresh event fdinfo - if(fd != -1) { - evt->set_fd_info(evt->get_tinfo()->get_fd(fd)); - } - } - } + process_recvmsg_ancillary_data(evt, parinfo); } #endif diff --git a/userspace/libsinsp/parsers.h b/userspace/libsinsp/parsers.h index 364aad5e9f..ae2b80e903 100644 --- a/userspace/libsinsp/parsers.h +++ b/userspace/libsinsp/parsers.h @@ -134,6 +134,16 @@ class sinsp_parser { inline void add_pipe(sinsp_evt* evt, int64_t fd, uint64_t ino, uint32_t openflags); // Return false if the update didn't happen (for example because the tuple is NULL) bool update_fd(sinsp_evt* evt, const sinsp_evt_param* parinfo); +#ifndef _WIN32 + // Process file descriptors extracted from recvmsg ancillary data. + inline void process_recvmsg_ancillary_data_fds(int const* fds, + size_t fds_len, + scap_threadinfo* scap_tinfo, + char* error) const; + // Process recvmsg ancillary data. + inline void process_recvmsg_ancillary_data(sinsp_evt* evt, + sinsp_evt_param const* parinfo) const; +#endif // Next 4 return false if the update didn't happen because the tuple is identical to the given // address