From 4f32dc039164c080fb94e6ce86b1dd892e2e2666 Mon Sep 17 00:00:00 2001 From: "B. Scott Michel" Date: Fri, 27 Dec 2024 13:40:11 -0800 Subject: [PATCH] ETHER: Don't spin in the libslirp reader thread Avoid spinning in the reader thread executing sim_slirp_select() when libslirp isn't managing any sockets. Waits on a condition variable until libslirp registers a socket via register_poll_socket(), at which point, the condition variable is signaled. - sim_atomic.h: Add atomic value primitives to ensure value synchronization across threads. Required for ARM-based platforms, useful on Intel. At the moment, these primitives are used by NAT(libslirp). However, there are many other places in SIMH where they would be useful if used. Does not (yet) replace the AIO intrinsics or synchronization code in sim_defs.h. - sim_slirp: Add no_sockets_cv condvar, no_sockets_lock mutex. If libslirp hasn't registered any sockets (which includes the TCP and UDP redirections), sim_slirp_select() waits on the condvar until awakened when register_poll_socket() encounters its first socket. Simulators that don't use AIO, but end up calling sim_slirp_select(): Sleep for 5 ms and return 0. It's a punt. - sim_ether: Close and cleanup the underlying Ethernet device before cleaning up the other reader thread synchronization primitives. Otherwise, the NAT(libslirp) reader thread will not exit. --- sim_atomic.h | 273 ++++++++++++++++++++++++++++++++++ sim_ether.c | 7 +- sim_slirp/sim_slirp.c | 23 ++- sim_slirp/sim_slirp_network.h | 9 +- sim_slirp/slirp_poll.c | 88 ++++++++--- 5 files changed, 371 insertions(+), 29 deletions(-) create mode 100644 sim_atomic.h diff --git a/sim_atomic.h b/sim_atomic.h new file mode 100644 index 000000000..28388ad2d --- /dev/null +++ b/sim_atomic.h @@ -0,0 +1,273 @@ +/* Atomic get/put/add/sub/inc/dec support. Fashioned after the SDL2 approach to + * atomic variables. Uses compiler and platform intrinsics whenever possible, + * falling back to a mutex-based approach when necessary. + * + * Supported compilers: GCC and Clang across all platforms (including Windows, + * if compiling within a MinGW environment.) + * + * Platforms: Windows with MSVC. + * + * Types: + * + * sim_atomic_value_t: The wrapper structure that encapsulates the atomic + * value. Normally, if using GCC, Clang or MSVC, the value is managed via the + * appropriate intrinsics. The fallback is a pthread mutex. + * + * sim_atomic_type_t: The underlying type inside the sim_atomic_value_t wrapper. + * This is a long for GCC and Clang, LONG for Windows. + * + * Clang's atomic intrinsics are slightly different as compared to GCC: For add, + * subtract operations, Clang's atomics fetch and return the old value. So, + * Clang takes an extra atomic load to fetch the new value. + * + * sim_atomic_init(sim_atomic_value_t *): Initialize an atomic wrapper. The + * value is set to 0 and the mutex is initialized (when necessary.) + * + * sim_atomic_destroy(sim_atomic_value_t *p): The inverse of sim_atomic_init(). + * The value is set to -1. When necessary, the mutex is destroyed. + * + * sim_atomic_get(sim_atomic_value_t *p): Atomically returns the + * sim_atomic_type_t value in the wrapper. + * + * sim_atomic_put(sim_atomic_value_t *p, sim_atomic_type_t newval): Atomically + * stores a new value in the wrapper. + * + * sim_atomic_add, sim_atomic_sub(sim_atomic_value_t *p, sim_atomic_type_t x): + * Atomically add or subtract a quantity to or from the wrapper's value. + * + * sim_atomic_inc, sim_atomic_dec(sim_atomic_value_t *p): Atomically increment or + * decrement the wrapper's value. + */ + +#if !defined(SIM_ATOMIC_H) + +#include + +/* TODO: defined(__DECC_VER) && defined(_IA64) -- DEC C on Itanium looks like it + * might be the same as Windows' interlocked API. Contribtion/correction needed. */ + +/* NON-FEATURE: Older GCC __sync_* primitives. These have been deprecated for a long + * time. Explicitly not supported. */ + +#if (defined(_WIN32) || defined(_WIN64)) || \ + (defined(__ATOMIC_ACQ_REL) && defined(__ATOMIC_SEQ_CST) && defined(__ATOMIC_ACQUIRE)) +/* Atomic operations available! */ +#define HAVE_ATOMIC_PRIMS 1 +#else +#define HAVE_ATOMIC_PRIMS 0 +#endif + +/*~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~= + * Value type and wrapper for atomics: + *~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=*/ + +#if !defined(_WIN32) && !defined(_WIN64) +typedef long sim_atomic_type_t; +#else +typedef LONG sim_atomic_type_t; +#endif + +typedef struct { +#if !HAVE_ATOMIC_PRIMS + /* If the compiler doesn't support atomic intrinsics, the backup plan is + * a mutex. */ + pthread_mutex_t value_lock; +#endif + + volatile sim_atomic_type_t value; +} sim_atomic_value_t; + +/*~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~= + * Initialization, destruction: + *~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=*/ + +static SIM_INLINE void sim_atomic_init(sim_atomic_value_t *p) +{ +#if !HAVE_ATOMIC_PRIMS + pthread_mutex_init(&p->value_lock, NULL); +#endif + p-> value = 0; +} + +static SIM_INLINE void sim_atomic_destroy(sim_atomic_value_t *p) +{ +#if !HAVE_ATOMIC_PRIMS + pthread_mutex_destroy(&p->value_lock); +#endif + p-> value = -1; +} + +/*~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~= + * Primitives: + *~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=*/ + +static SIM_INLINE sim_atomic_type_t sim_atomic_get(sim_atomic_value_t *p) +{ + sim_atomic_type_t retval; + +#if HAVE_ATOMIC_PRIMS +# if defined(__ATOMIC_ACQUIRE) && (defined(__GNUC__) || defined(__clang__)) + __atomic_load(&p->value, &retval, __ATOMIC_ACQUIRE); +# elif defined(_WIN32) || defined(_WIN64) +# if defined(_M_IX86) || defined(_M_X64) + /* Intel Total Store Ordering optimization. */ + retval = p->value; +# else + retval = InterlockedOr(&p->value, 0); +# endif +# else +# error "sim_atomic_get: No intrinsic?" + retval = -1; +# endif +#else + pthread_mutex_lock(&p->value_lock); + retval = p->value; + pthread_mutex_unlock(&p->value_lock); +#endif + + return retval; +} + +static SIM_INLINE void sim_atomic_put(sim_atomic_value_t *p, sim_atomic_type_t newval) +{ +#if HAVE_ATOMIC_PRIMS +# if defined(__ATOMIC_SEQ_CST) && (defined(__GNUC__) || defined(__clang__)) + /* Clang doesn't like __ATOMIC_ACQ_REL for atomic stores. And for good reason: sequential + * consistency is the appropriate memory ordering for storing values. */ + __atomic_store(&p->value, &newval, __ATOMIC_SEQ_CST); +# elif defined(_WIN32) || defined(_WIN64) +# if defined(_M_IX86) || defined(_M_X64) + /* Intel Total Store Ordering optimization. */ + p->value = newval; +# else + sim_atomic_type_t retval, cmp; + + do { + cmp = sim_atomic_int_get(p); + retval = InterlockedCompareExchange(&p->value, newval, cmp); + } while (retval != cmp); +# endif +# else +# error "sim_atomic_put: No intrinsic?" +# endif +#else + pthread_mutex_lock(&p->value_lock); + p->value = newval; + pthread_mutex_unlock(&p->value_lock); +#endif +} + +static SIM_INLINE sim_atomic_type_t sim_atomic_add(sim_atomic_value_t *p, sim_atomic_type_t x) +{ + sim_atomic_type_t retval; + +#if HAVE_ATOMIC_PRIMS +# if defined(__ATOMIC_ACQ_REL) +# if defined(__GNUC__) + retval = __atomic_add_fetch(&p->value, x, __ATOMIC_ACQ_REL); +# elif defined(__clang__) +# if __LONG_WIDTH__ == 32 + (void) __atomic_fetch_add_4(&p->value, x, __ATOMIC_ACQ_REL); +# else if __LONG_WIDTH__ == 64 + (void) __atomic_fetch_add_8(&p->value, x, __ATOMIC_ACQ_REL); +# endif + /* Return the updated value. */ + retval = sim_atomic_get(p); +# else +# error "sim_atomic_add: No __ATOMIC_ACQ_REL intrinsic?" +# endif +# elif defined(_WIN32) || defined(_WIN64) + retval = InterlockedAdd(&p->value, x); +# else +# error "sim_atomic_add: No intrinsic?" +# endif +#else + pthread_mutex_lock(&p->value_lock); + p->value += newval; + retval = p->value; + pthread_mutex_unlock(&p->value_lock); +#endif + + return retval; +} + +static SIM_INLINE sim_atomic_type_t sim_atomic_sub(sim_atomic_value_t *p, sim_atomic_type_t x) +{ + sim_atomic_type_t retval; + +#if HAVE_ATOMIC_PRIMS +# if defined(__ATOMIC_ACQ_REL) +# if defined(__GNUC__) + retval = __atomic_sub_fetch(&p->value, x, __ATOMIC_ACQ_REL); +# elif defined(__clang__) +# if __LONG_WIDTH__ == 32 + __atomic_fetch_sub_4(&p->value, x, __ATOMIC_ACQ_REL); +# else if __LONG_WIDTH__ == 64 + __atomic_fetch_sub_8(&p->value, x, __ATOMIC_ACQ_REL); +# endif + /* Return the updated value. */ + retval = sim_atomic_get(p); +# else +# error "sim_atomic_sub: No __ATOMIC_ACQ_REL intrinsic?" +# endif +# elif defined(_WIN32) || defined(_WIN64) + /* No InterlockedSub() -- use the math identity. */ + retval = sim_atomic_add(p, -x); +# else +# error "sim_atomic_sub: No intrinsic?" +# endif +#else + pthread_mutex_lock(&p->value_lock); + p->value -= newval; + retval = p->value; + pthread_mutex_unlock(&p->value_lock); +#endif + + return retval; +} + +static SIM_INLINE sim_atomic_type_t sim_atomic_inc(sim_atomic_value_t *p) +{ + sim_atomic_type_t retval; + +#if HAVE_ATOMIC_PRIMS +# if !defined(_WIN32) && !defined(_WIN64) + retval = sim_atomic_add(p, 1); +# elif defined(_WIN32) || defined(_WIN64) + retval = InterlockedIncrement(&p->value); +# else +# error "sim_atomic_inc: No intrinsic?" +# endif +#else + pthread_mutex_lock(&p->value_lock); + retval = ++p->value; + pthread_mutex_unlock(&p->value_lock); +#endif + + return retval; +} + +static SIM_INLINE sim_atomic_type_t sim_atomic_dec(sim_atomic_value_t *p) +{ + sim_atomic_type_t retval; + +#if HAVE_ATOMIC_PRIMS +# if !defined(_WIN32) && !defined(_WIN64) + retval = sim_atomic_sub(p, 1); +# elif defined(_WIN32) || defined(_WIN64) + retval = InterlockedDecrement(&p->value); +# else +# error "sim_atomic_dec: No intrinsic?" +# endif +#else + pthread_mutex_lock(&p->value_lock); + retval = --p->value; + pthread_mutex_unlock(&p->value_lock); +#endif + + return retval; +} + + +#define SIM_ATOMIC_H +#endif diff --git a/sim_ether.c b/sim_ether.c index 3a6f2abf2..dc51d7947 100644 --- a/sim_ether.c +++ b/sim_ether.c @@ -2674,6 +2674,11 @@ dev->fd_handle = 0; dev->have_host_nic_phy_addr = 0; #if defined (USE_READER_THREAD) +/* Close the ethernet device first. */ +_eth_close_port (dev->eth_api, pcap, pcap_fd); + +/* Then continue to clean up the sync primitives that are no longer + * needed. */ pthread_join (dev->reader_thread, NULL); pthread_mutex_destroy (&dev->lock); pthread_cond_signal (&dev->writer_cond); @@ -2681,6 +2686,7 @@ pthread_join (dev->writer_thread, NULL); pthread_mutex_destroy (&dev->self_lock); pthread_mutex_destroy (&dev->writer_lock); pthread_cond_destroy (&dev->writer_cond); + if (1) { ETH_WRITE_REQUEST *buffer; while (NULL != (buffer = dev->write_buffers)) { @@ -2695,7 +2701,6 @@ if (1) { ethq_destroy (&dev->read_queue); /* release FIFO queue */ #endif -_eth_close_port (dev->eth_api, pcap, pcap_fd); sim_messagef (SCPE_OK, "Eth: closed %s\n", dev->name); /* clean up the mess */ diff --git a/sim_slirp/sim_slirp.c b/sim_slirp/sim_slirp.c index 54b30d222..0e2f4a01b 100644 --- a/sim_slirp/sim_slirp.c +++ b/sim_slirp/sim_slirp.c @@ -287,7 +287,12 @@ SimSlirpNetwork *sim_slirp_open(const char *args, void *pkt_opaque, packet_callb /* SIMH state/config */ slirp->args = (char *) calloc(1 + strlen(args), sizeof(char)); strcpy(slirp->args, args); - pthread_mutex_init(&slirp->libslirp_access, NULL); +#if defined(USE_READER_THREAD) + pthread_mutex_init(&slirp->libslirp_lock, NULL); + pthread_cond_init(&slirp->no_sockets_cv, NULL); + pthread_mutex_init(&slirp->no_sockets_lock, NULL); + sim_atomic_init(&slirp->n_sockets); +#endif slirp->original_debflags = dptr->debflags; dptr->debflags = sim_combine_debtabs(dptr->debflags, slirp_dbgtable); @@ -524,7 +529,17 @@ void sim_slirp_close(SimSlirpNetwork *slirp) slirp->fds = NULL; #endif - pthread_mutex_destroy(&slirp->libslirp_access); +#if defined(USE_READER_THREAD) + sim_atomic_put(&slirp->n_sockets, -1); + pthread_cond_broadcast(&slirp->no_sockets_cv); + + pthread_mutex_destroy(&slirp->libslirp_lock); + sim_atomic_destroy(&slirp->n_sockets); + pthread_cond_destroy(&slirp->no_sockets_cv); + pthread_mutex_destroy(&slirp->no_sockets_lock); + sim_atomic_destroy(&slirp->n_sockets); +#endif + free(slirp); } @@ -690,9 +705,9 @@ int sim_slirp_send(SimSlirpNetwork *slirp, const char *msg, size_t len, int flag GLIB_UNUSED_PARAM(flags); /* Just send the packet up to libslirp. */ - pthread_mutex_lock(&slirp->libslirp_access); + pthread_mutex_lock(&slirp->libslirp_lock); slirp_input(slirp->slirp_cxn, (const uint8_t *) msg, (int) len); - pthread_mutex_unlock(&slirp->libslirp_access); + pthread_mutex_unlock(&slirp->libslirp_lock); return (int) len; } diff --git a/sim_slirp/sim_slirp_network.h b/sim_slirp/sim_slirp_network.h index f2636fb1b..2a342c9a3 100644 --- a/sim_slirp/sim_slirp_network.h +++ b/sim_slirp/sim_slirp_network.h @@ -7,6 +7,7 @@ #include "sim_sock.h" #include "sim_slirp.h" +#include "sim_atomic.h" #include "libslirp.h" #if !defined(SLIRP_NETWORK_STATE_H) @@ -63,9 +64,15 @@ struct sim_slirp { #if defined(USE_READER_THREAD) /* Access lock to libslirp. libslirp is not threaded or protected. */ - pthread_mutex_t libslirp_access; + pthread_mutex_t libslirp_lock; + + /* Condvar, mutex when there are no sockets to poll or select. */ + pthread_cond_t no_sockets_cv; + pthread_mutex_t no_sockets_lock; #endif + sim_atomic_value_t n_sockets; + /* DNS search domains (argument copy) */ char *dns_search; char **dns_search_domains; diff --git a/sim_slirp/slirp_poll.c b/sim_slirp/slirp_poll.c index 46cc18bfb..17b21bce8 100644 --- a/sim_slirp/slirp_poll.c +++ b/sim_slirp/slirp_poll.c @@ -106,37 +106,59 @@ int sim_slirp_select(SimSlirpNetwork *slirp, int ms_timeout) int retval = 0; uint32 slirp_timeout = ms_timeout; struct timeval tv; + sim_atomic_type_t n_sockets; if (slirp == NULL) /* Will cause the reader thread to exit! */ return -1; - /* Note: It's a generally good practice to acquire a mutex and hold it until an operation - * completes. In this case, though, poll()/select() will block and prevent outbound writes - * via sim_slirp_send(), which itself blocks waiting for the mutex. - * - * Hold the mutex only when calling libslirp functions, reacquire when needed. - */ - pthread_mutex_lock(&slirp->libslirp_access); + n_sockets = sim_atomic_get(&slirp->n_sockets); + if (n_sockets > 0) { + /* Note: It's a generally good practice to acquire a mutex and hold it until an operation + * completes. In this case, though, poll()/select() will block and prevent outbound writes + * via sim_slirp_send(), which itself blocks waiting for the mutex. + * + * Hold the mutex only when calling libslirp functions, reacquire when needed. + */ + pthread_mutex_lock(&slirp->libslirp_lock); - /* Check on expiring libslirp timers. */ - simh_timer_check(slirp->slirp_cxn); + /* Check on expiring libslirp timers. */ + simh_timer_check(slirp->slirp_cxn); - /* Ask libslirp to poll for I/O events. */ - initialize_poll(slirp, slirp_timeout, &tv); - slirp_pollfds_fill_socket(slirp->slirp_cxn, &slirp_timeout, add_poll_callback, slirp); + /* Ask libslirp to poll for I/O events. */ + initialize_poll(slirp, slirp_timeout, &tv); + slirp_pollfds_fill_socket(slirp->slirp_cxn, &slirp_timeout, add_poll_callback, slirp); - pthread_mutex_unlock(&slirp->libslirp_access); + pthread_mutex_unlock(&slirp->libslirp_lock); - retval = do_poll(slirp, ms_timeout, &tv); + retval = do_poll(slirp, ms_timeout, &tv); - if (retval > 0) { - /* The libslirp idiom invokes slirp_pollfds_poll within the same function as slirp_pollfds_fill(). */ - pthread_mutex_lock(&slirp->libslirp_access); - slirp_pollfds_poll(slirp->slirp_cxn, 0, get_events_callback, slirp); - pthread_mutex_unlock(&slirp->libslirp_access); - } else if (retval < 0) { - report_error(slirp); + if (retval > 0) { + /* The libslirp idiom invokes slirp_pollfds_poll within the same function as slirp_pollfds_fill(). */ + pthread_mutex_lock(&slirp->libslirp_lock); + slirp_pollfds_poll(slirp->slirp_cxn, 0, get_events_callback, slirp); + pthread_mutex_unlock(&slirp->libslirp_lock); + } else if (retval < 0) { + report_error(slirp); + } + } else if (n_sockets == 0) { +#if defined(USE_READER_THREAD) + /* Block on the condvar until there's at least one socket registered by libslirp. */ + pthread_mutex_lock(&slirp->no_sockets_lock); + pthread_cond_wait(&slirp->no_sockets_cv, &slirp->no_sockets_lock); + pthread_mutex_unlock(&slirp->no_sockets_lock); + + return sim_slirp_select(slirp, ms_timeout); +#else + /* This is weird: Should never actually be called because we're not inside of + * the reader thread. Punt by sleeping. Not guaranteed to actually have any + * sockets where SIMH needs to poll, so avoid infinite recursion as well. */ + sim_os_sleep(5 /* ms */); + retval = 0; +#endif + } else { + /* Error or shutting down... make the reader thread exit. */ + retval = -1; } return retval; @@ -201,8 +223,9 @@ static int do_poll(SimSlirpNetwork *slirp, int ms_timeout, struct timeval *timeo size_t i; AIO_DEBUG_LOCK; - fprintf(sim_deb, "do_poll(): poll() returns %d\n", retval); - for (i = 0; i < (size_t) slirp->fd_idx; ++i) { + if (sim_deb != NULL) + fprintf(sim_deb, "do_poll(): poll() returns %d\n", retval); + for (i = 0; i < (size_t) slirp->fd_idx && sim_deb != NULL; ++i) { fprintf(sim_deb, "[%3" SIZE_T_FMT "u] fd: %04" SIM_PRIsocket ", events: %04x\n", i, slirp->fds[i].fd, slirp->fds[i].events); } @@ -304,6 +327,19 @@ void register_poll_socket(slirp_os_socket fd, void *opaque) sim_debug(slirp_dbg_mask(slirp, DBG_SOCKET), slirp->dptr, "register_poll_socket(%" SIM_PRIsocket ") index %" SIZE_T_FMT "u\n", fd, i); + + if (sim_atomic_inc(&slirp->n_sockets) == 1) { +#if defined(USE_READER_THREAD) + /* Wake up the reader thread. */ + pthread_cond_broadcast(&slirp->no_sockets_cv); + + sim_debug(slirp_dbg_mask(slirp, DBG_SOCKET), slirp->dptr, + "register_poll_socket(%" SIM_PRIsocket ") waking up reader thread.\n", fd); +#endif + } + + sim_debug(slirp_dbg_mask(slirp, DBG_SOCKET), slirp->dptr, + "register_poll_socket(%" SIM_PRIsocket ") n_sockets = %ld.\n", fd, sim_atomic_get(&slirp->n_sockets)); } /* Reap a disused socket. */ @@ -343,6 +379,12 @@ void unregister_poll_socket(slirp_os_socket fd, void *opaque) sim_messagef(SCPE_OK, "unregister_poll_socket(poll %" SIM_PRIsocket ") not invalidated.\n", fd); } #endif + + /* Keep track of the socket count so that sim_slirp_select() blocks if there are no sockets to + * poll() or select(). */ + if (sim_atomic_get(&slirp->n_sockets) > 0) { + sim_atomic_dec(&slirp->n_sockets); + } } /* Debugging output for add/get event callbacks. */