From f2f73e3de751a30463c91261ab2c709c59f503e7 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Fri, 8 Dec 2023 15:54:09 +0000 Subject: [PATCH 01/18] DAOS-8331 client: add client side metrics 1. Move TLS to common, so both client and server can have TLS, which metrics can be attached metrics on it. 2. Add object metrics on the client side, enabled by export DAOS_CLIENT_METRICS=1. And client metrics are organized as "root/jobid/pid/xxxxx" And root/jobid/pid are stored in an independent share memory, which will only be destoryed if all jobs are destroyed. During each daos thread initialization, it will created another shmem (pid/xxx), which all metrics of the thread will be attached to. And this metric will be destoryed once the thread exit, though if DAOS_CLIENT_METRICS_RETAIN is set, these client metrics will be retain, and it can be retrieved by daos_metrics --jobid 3. Add DAOS_METRIC_DUMP_ENV dump metrics from current thread once it exit. 4. Some fixes in telemetrics about conv_ptr during re-open the share memory. 5. Add daos_metrics --jobid XXX options to retrieve all metrics of the job. Required-githooks: true Signed-off-by: Di Wang --- src/client/api/SConscript | 2 +- src/client/api/init.c | 12 +- src/client/api/metrics.c | 146 +++++++++++++ src/common/SConscript | 2 +- src/common/tls.c | 230 +++++++++++++++++++++ src/engine/SConscript | 2 +- src/engine/init.c | 9 +- src/engine/srv.c | 10 +- src/engine/srv_internal.h | 4 - src/engine/tls.c | 155 -------------- src/gurt/examples/telem_consumer_example.c | 9 +- src/gurt/telemetry.c | 218 +++++++++++-------- src/gurt/tests/test_gurt_telem_producer.c | 11 +- src/include/daos/metric.h | 19 ++ src/include/daos/tls.h | 114 ++++++++++ src/include/daos_srv/daos_engine.h | 81 +------- src/include/gurt/telemetry_common.h | 3 + src/include/gurt/telemetry_consumer.h | 11 +- src/object/cli_mod.c | 98 ++++++++- src/object/cli_shard.c | 132 +++++++++++- src/object/obj_internal.h | 55 +++++ src/object/obj_utils.c | 61 +++++- src/object/srv_internal.h | 28 --- src/object/srv_mod.c | 59 ++---- src/utils/daos_metrics/daos_metrics.c | 142 ++++++++----- 25 files changed, 1143 insertions(+), 470 deletions(-) create mode 100644 src/client/api/metrics.c create mode 100644 src/common/tls.c delete mode 100644 src/engine/tls.c create mode 100644 src/include/daos/metric.h create mode 100644 src/include/daos/tls.h diff --git a/src/client/api/SConscript b/src/client/api/SConscript index 62ba96ef600..43af58a6c86 100644 --- a/src/client/api/SConscript +++ b/src/client/api/SConscript @@ -1,7 +1,7 @@ """Build DAOS client""" LIBDAOS_SRC = ['agent.c', 'array.c', 'container.c', 'event.c', 'init.c', 'job.c', 'kv.c', 'mgmt.c', - 'object.c', 'pool.c', 'rpc.c', 'task.c', 'tx.c', 'pipeline.c'] + 'object.c', 'pool.c', 'rpc.c', 'task.c', 'tx.c', 'pipeline.c', 'metrics.c'] def scons(): diff --git a/src/client/api/init.c b/src/client/api/init.c index da02c71631c..b357e2088da 100644 --- a/src/client/api/init.c +++ b/src/client/api/init.c @@ -23,6 +23,7 @@ #include #include #include +#include #if BUILD_PIPELINE #include #endif @@ -242,19 +243,25 @@ daos_init(void) if (rc != 0) D_GOTO(out_co, rc); + rc = dc_tm_init(); + if (rc) + D_GOTO(out_obj, rc); + #if BUILD_PIPELINE /** set up pipeline */ rc = dc_pipeline_init(); if (rc != 0) - D_GOTO(out_obj, rc); + D_GOTO(out_tm, rc); #endif module_initialized++; D_GOTO(unlock, rc = 0); #if BUILD_PIPELINE +out_tm: + dc_tm_fini(); +#endif out_obj: dc_obj_fini(); -#endif out_co: dc_cont_fini(); out_pool: @@ -322,6 +329,7 @@ daos_fini(void) D_ERROR("failed to disconnect some resources may leak, " DF_RC"\n", DP_RC(rc)); + dc_tm_fini(); dc_agent_fini(); dc_job_fini(); diff --git a/src/client/api/metrics.c b/src/client/api/metrics.c new file mode 100644 index 00000000000..abdb0d09fd1 --- /dev/null +++ b/src/client/api/metrics.c @@ -0,0 +1,146 @@ +/* + * (C) Copyright 2020-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define INIT_JOB_NUM 1024 +bool daos_client_metric; +bool daos_client_metric_retain; + +#define MAX_IDS_SIZE(num) (num * D_TM_METRIC_SIZE) +/* The client side metrics structure looks like + * root/job_id/pid/.... + */ +int +dc_tm_init(void) +{ + int metrics_tag; + pid_t pid; + int rc; + + d_getenv_bool(DAOS_CLIENT_METRICS_ENV, &daos_client_metric); + if (!daos_client_metric) + return 0; + + rc = dc_tls_key_create(); + if (rc) + D_GOTO(out, rc); + + metrics_tag = D_TM_OPEN_OR_CREATE; + d_getenv_bool(DAOS_CLIENT_METRICS_RETAIN_ENV, &daos_client_metric_retain); + if (daos_client_metric_retain) + metrics_tag |= D_TM_RETAIN_SHMEM; + else + metrics_tag |= D_TM_RETAIN_SHMEM_IF_NON_EMPTY; + + rc = d_tm_init(DC_TM_JOB_ROOT_ID, MAX_IDS_SIZE(INIT_JOB_NUM), metrics_tag); + if (rc != 0) { + DL_ERROR(rc, "init job root id."); + return rc; + } + + pid = getpid(); + D_INFO("INIT %s/%u metrics\n", dc_jobid, pid); + + /** create new shmem space for per-pool metrics */ + rc = d_tm_add_ephemeral_dir(NULL, MAX_IDS_SIZE(INIT_JOB_NUM), "%s/%u", + dc_jobid, pid); + if (rc != 0) { + DL_ERROR(rc, "add metric %s/%u failed.\n", dc_jobid, pid); + D_GOTO(out, rc); + } + +out: + if (rc) + d_tm_fini(); + + return rc; +} + +static void +iter_dump(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, + char *path, int format, int opt_fields, void *arg) +{ + d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg); +} + +static int +dump_tm_file(const char *dump_path) +{ + struct d_tm_context *ctx; + struct d_tm_node_t *root; + char dirname[D_TM_MAX_NAME_LEN] = {0}; + uint32_t filter; + FILE *dump_file; + int rc = 0; + + dump_file = fopen(dump_path, "w+"); + if (dump_file == NULL) { + D_INFO("cannot open %s", dump_path); + return -DER_INVAL; + } + + filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | D_TM_MEMINFO | + D_TM_TIMER_SNAPSHOT | D_TM_GAUGE | D_TM_STATS_GAUGE; + + ctx = d_tm_open(DC_TM_JOB_ROOT_ID); + if (ctx == NULL) + D_GOTO(close, rc = -DER_NOMEM); + + snprintf(dirname, sizeof(dirname), "%s/%u", dc_jobid, getpid()); + root = d_tm_find_metric(ctx, dirname); + if (root == NULL) { + printf("No metrics found at: '%s'\n", dirname); + D_GOTO(close_ctx, rc = -DER_NONEXIST); + } + + d_tm_print_field_descriptors(0, dump_file); + + d_tm_iterate(ctx, root, 0, filter, NULL, D_TM_CSV, 0, iter_dump, dump_file); + +close_ctx: + d_tm_close(&ctx); +close: + fclose(dump_file); + return rc; +} + +void +dc_tm_fini() +{ + pid_t pid = getpid(); + char *dump_path; + int rc; + + if (!daos_client_metric) + return; + + dump_path = getenv(METRIC_DUMP_ENV); + D_INFO("dump path is %s\n", dump_path); + if (dump_path != NULL) + dump_tm_file(dump_path); + + dc_tls_fini(); + dc_tls_key_delete(); + + if (!daos_client_metric_retain) { + rc = d_tm_del_ephemeral_dir("%s/%d", dc_jobid, pid); + if (rc != 0) + DL_ERROR(rc, "delete tm directory %s/%d.", dc_jobid, pid); + } + + D_INFO("delete pid %s/%u\n", dc_jobid, pid); + d_tm_fini(); +} diff --git a/src/common/SConscript b/src/common/SConscript index 151ba5f0ed4..432b72403e5 100644 --- a/src/common/SConscript +++ b/src/common/SConscript @@ -9,7 +9,7 @@ COMMON_FILES = ['debug.c', 'mem.c', 'fail_loc.c', 'lru.c', 'dedup.c', 'profile.c', 'compression.c', 'compression_isal.c', 'compression_qat.c', 'multihash.c', 'multihash_isal.c', 'cipher.c', 'cipher_isal.c', 'qat.c', 'fault_domain.c', - 'policy.c'] + 'policy.c', 'tls.c'] def build_daos_common(denv, client): diff --git a/src/common/tls.c b/src/common/tls.c new file mode 100644 index 00000000000..68bdef8a4db --- /dev/null +++ b/src/common/tls.c @@ -0,0 +1,230 @@ +/** + * (C) Copyright 2016-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * It implements thread-local storage (TLS) for DAOS. + */ +#include +#include + +/* The array remember all of registered module keys on one node. */ +static struct daos_module_key *daos_module_keys[DAOS_MODULE_KEYS_NR] = { NULL }; +pthread_mutex_t daos_module_keys_lock = PTHREAD_MUTEX_INITIALIZER; + +static __thread bool dc_tls_thread_init; + +static pthread_key_t dss_tls_key; +static pthread_key_t dc_tls_key; + +void +daos_register_key(struct daos_module_key *key) +{ + int i; + + D_MUTEX_LOCK(&daos_module_keys_lock); + for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) { + if (daos_module_keys[i] == NULL) { + daos_module_keys[i] = key; + key->dmk_index = i; + break; + } + } + D_MUTEX_UNLOCK(&daos_module_keys_lock); + D_ASSERT(i < DAOS_MODULE_KEYS_NR); +} + +void +daos_unregister_key(struct daos_module_key *key) +{ + if (key == NULL) + return; + D_ASSERT(key->dmk_index >= 0); + D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR); + D_MUTEX_LOCK(&daos_module_keys_lock); + daos_module_keys[key->dmk_index] = NULL; + D_MUTEX_UNLOCK(&daos_module_keys_lock); +} + +struct daos_module_key* +daos_get_module_key(int index) +{ + D_ASSERT(index < DAOS_MODULE_KEYS_NR); + D_ASSERT(index >= 0); + + return daos_module_keys[index]; +} + +static int +daos_thread_local_storage_init(struct daos_thread_local_storage *dtls, + int xs_id, int tgt_id) +{ + int rc = 0; + int i; + + if (dtls->dtls_values == NULL) { + D_ALLOC_ARRAY(dtls->dtls_values, DAOS_MODULE_KEYS_NR); + if (dtls->dtls_values == NULL) + return -DER_NOMEM; + } + + for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) { + struct daos_module_key *dmk = daos_module_keys[i]; + + if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) { + D_ASSERT(dmk->dmk_init != NULL); + dtls->dtls_values[i] = dmk->dmk_init(dtls->dtls_tag, xs_id, tgt_id); + if (dtls->dtls_values[i] == NULL) { + rc = -DER_NOMEM; + break; + } + } + } + return rc; +} + +static void +daos_thread_local_storage_fini(struct daos_thread_local_storage *dtls) +{ + int i; + + if (dtls->dtls_values != NULL) { + for (i = DAOS_MODULE_KEYS_NR - 1; i >= 0; i--) { + struct daos_module_key *dmk = daos_module_keys[i]; + + if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) { + D_ASSERT(dtls->dtls_values[i] != NULL); + D_ASSERT(dmk->dmk_fini != NULL); + dmk->dmk_fini(dtls->dtls_tag, dtls->dtls_values[i]); + } + } + } + + D_FREE(dtls->dtls_values); +} + + +/* + * Allocate daos_thread_local_storage for a particular thread on server and + * store the pointer in a thread-specific value which can be fetched at any + * time with daos_tls_get(). + */ +static struct daos_thread_local_storage * +daos_tls_init(int tag, int xs_id, int tgt_id, bool server) +{ + struct daos_thread_local_storage *dtls; + int rc; + + D_ALLOC_PTR(dtls); + if (dtls == NULL) + return NULL; + + dtls->dtls_tag = tag; + rc = daos_thread_local_storage_init(dtls, xs_id, tgt_id); + if (rc != 0) { + D_FREE(dtls); + return NULL; + } + + if (server) { + rc = pthread_setspecific(dss_tls_key, dtls); + } else { + rc = pthread_setspecific(dc_tls_key, dtls); + if (rc == 0) + dc_tls_thread_init = true; + } + + if (rc) { + D_ERROR("failed to initialize tls: %d\n", rc); + daos_thread_local_storage_fini(dtls); + D_FREE(dtls); + return NULL; + } + + return dtls; +} + +int +ds_tls_key_create(void) +{ + return pthread_key_create(&dss_tls_key, NULL); +} + +int +dc_tls_key_create(void) +{ + return pthread_key_create(&dc_tls_key, NULL); +} + +void +ds_tls_key_delete() +{ + pthread_key_delete(dss_tls_key); +} + +void +dc_tls_key_delete(void) +{ + pthread_key_delete(dc_tls_key); +} + +/* Free DTC for a particular thread. */ +static void +daos_tls_fini(struct daos_thread_local_storage *dtls, bool server) +{ + daos_thread_local_storage_fini(dtls); + D_FREE(dtls); + if (server) + pthread_setspecific(dss_tls_key, NULL); + else + pthread_setspecific(dc_tls_key, NULL); +} + +/* Allocate local per thread storage. */ +struct daos_thread_local_storage * +dc_tls_init(int tag, uint32_t pid) +{ + return daos_tls_init(tag, -1, pid, false); +} + +/* Free DTC for a particular thread. */ +void +dc_tls_fini(void) +{ + struct daos_thread_local_storage *dtls; + + dtls = (struct daos_thread_local_storage *)pthread_getspecific(dc_tls_key); + if (dtls != NULL) + daos_tls_fini(dtls, false); +} + +struct daos_thread_local_storage * +dc_tls_get(unsigned int tag) +{ + if (!dc_tls_thread_init) + return dc_tls_init(tag, getpid()); + + return (struct daos_thread_local_storage *)pthread_getspecific(dc_tls_key); +} + +struct daos_thread_local_storage * +dss_tls_get() +{ + return (struct daos_thread_local_storage *) + pthread_getspecific(dss_tls_key); +} + +/* Allocate local per thread storage. */ +struct daos_thread_local_storage * +dss_tls_init(int tag, int xs_id, int tgt_id) +{ + return daos_tls_init(tag, xs_id, tgt_id, true); +} + +/* Free DTC for a particular thread. */ +void +dss_tls_fini(struct daos_thread_local_storage *dtls) +{ + daos_tls_fini(dtls, true); +} diff --git a/src/engine/SConscript b/src/engine/SConscript index ceb00a409d0..e94b6a83dd6 100644 --- a/src/engine/SConscript +++ b/src/engine/SConscript @@ -29,7 +29,7 @@ def scons(): 'drpc_handler.c', 'drpc_listener.c', 'drpc_progress.c', 'init.c', 'module.c', 'srv_cli.c', 'profile.c', 'rpc.c', - 'server_iv.c', 'srv.c', 'srv.pb-c.c', 'tls.c', + 'server_iv.c', 'srv.c', 'srv.pb-c.c', 'sched.c', 'ult.c', 'event.pb-c.c', 'srv_metrics.c'] + libdaos_tgts diff --git a/src/engine/init.c b/src/engine/init.c index eb3bca9edb1..6b2125e8119 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "srv_internal.h" #include "drpc_internal.h" #include @@ -618,14 +619,14 @@ server_id_cb(uint32_t *tid, uint64_t *uid) } if (tid != NULL) { - struct dss_thread_local_storage *dtc; - struct dss_module_info *dmi; + struct daos_thread_local_storage *dtc; + struct daos_module_info *dmi; int index = daos_srv_modkey.dmk_index; - /* Avoid assertion in dss_module_key_get() */ + /* Avoid assertion in daos_module_key_get() */ dtc = dss_tls_get(); if (dtc != NULL && index >= 0 && index < DAOS_MODULE_KEYS_NR && - dss_module_keys[index] == &daos_srv_modkey) { + daos_get_module_key(index) == &daos_srv_modkey) { dmi = dss_get_module_info(); if (dmi != NULL) *tid = dmi->dmi_xs_id; diff --git a/src/engine/srv.c b/src/engine/srv.c index aa6cbd706e8..ff6629c362a 100644 --- a/src/engine/srv.c +++ b/src/engine/srv.c @@ -382,9 +382,9 @@ wait_all_exited(struct dss_xstream *dx, struct dss_module_info *dmi) static void dss_srv_handler(void *arg) { - struct dss_xstream *dx = (struct dss_xstream *)arg; - struct dss_thread_local_storage *dtc; - struct dss_module_info *dmi; + struct dss_xstream *dx = (struct dss_xstream *)arg; + struct daos_thread_local_storage *dtc; + struct dss_module_info *dmi; int rc; bool track_mem = false; bool signal_caller = true; @@ -1292,7 +1292,7 @@ dss_srv_fini(bool force) vos_standalone_tls_fini(); /* fall through */ case XD_INIT_TLS_REG: - pthread_key_delete(dss_tls_key); + ds_tls_key_delete(); /* fall through */ case XD_INIT_ULT_BARRIER: ABT_cond_free(&xstream_data.xd_ult_barrier); @@ -1389,7 +1389,7 @@ dss_srv_init(void) xstream_data.xd_init_step = XD_INIT_ULT_BARRIER; /* register xstream-local storage key */ - rc = pthread_key_create(&dss_tls_key, NULL); + rc = ds_tls_key_create(); if (rc) { rc = dss_abterr2der(rc); D_ERROR("Failed to register storage key: "DF_RC"\n", DP_RC(rc)); diff --git a/src/engine/srv_internal.h b/src/engine/srv_internal.h index 92504c026ca..d3a06d79db3 100644 --- a/src/engine/srv_internal.h +++ b/src/engine/srv_internal.h @@ -314,10 +314,6 @@ sched_create_thread(struct dss_xstream *dx, void (*func)(void *), void *arg, return dss_abterr2der(rc); } -/* tls.c */ -void dss_tls_fini(struct dss_thread_local_storage *dtls); -struct dss_thread_local_storage *dss_tls_init(int tag, int xs_id, int tgt_id); - /* server_iv.c */ void ds_iv_init(void); void ds_iv_fini(void); diff --git a/src/engine/tls.c b/src/engine/tls.c deleted file mode 100644 index 90ea6cce7c5..00000000000 --- a/src/engine/tls.c +++ /dev/null @@ -1,155 +0,0 @@ -/** - * (C) Copyright 2016-2021 Intel Corporation. - * - * SPDX-License-Identifier: BSD-2-Clause-Patent - */ -/** - * This file is part of the DAOS server. It implements thread-local storage - * (TLS) for DAOS service threads. - */ -#define D_LOGFAC DD_FAC(server) - -#include -#include "srv_internal.h" - -/* The array remember all of registered module keys on one node. */ -struct dss_module_key *dss_module_keys[DAOS_MODULE_KEYS_NR] = { NULL }; - -pthread_mutex_t dss_module_keys_lock = PTHREAD_MUTEX_INITIALIZER; - -void -dss_register_key(struct dss_module_key *key) -{ - int i; - - D_MUTEX_LOCK(&dss_module_keys_lock); - for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) { - if (dss_module_keys[i] == NULL) { - dss_module_keys[i] = key; - key->dmk_index = i; - break; - } - } - D_MUTEX_UNLOCK(&dss_module_keys_lock); - D_ASSERT(i < DAOS_MODULE_KEYS_NR); -} - -void -dss_unregister_key(struct dss_module_key *key) -{ - if (key == NULL) - return; - D_ASSERT(key->dmk_index >= 0); - D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR); - D_MUTEX_LOCK(&dss_module_keys_lock); - dss_module_keys[key->dmk_index] = NULL; - D_MUTEX_UNLOCK(&dss_module_keys_lock); -} - -/** - * Init thread context - * - * \param[in]dtls Init the thread context to allocate the - * local thread variable for each module. - * - * \retval 0 if initialization succeeds - * \retval negative errno if initialization fails - */ -static int -dss_thread_local_storage_init(struct dss_thread_local_storage *dtls, - int xs_id, int tgt_id) -{ - int rc = 0; - int i; - - if (dtls->dtls_values == NULL) { - D_ALLOC_ARRAY(dtls->dtls_values, - (int)ARRAY_SIZE(dss_module_keys)); - if (dtls->dtls_values == NULL) - return -DER_NOMEM; - } - - for (i = 0; i < DAOS_MODULE_KEYS_NR; i++) { - struct dss_module_key *dmk = dss_module_keys[i]; - - if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) { - D_ASSERT(dmk->dmk_init != NULL); - dtls->dtls_values[i] = dmk->dmk_init(dtls->dtls_tag, xs_id, tgt_id); - if (dtls->dtls_values[i] == NULL) { - rc = -DER_NOMEM; - break; - } - } - } - return rc; -} - -/** - * Finish module context - * - * \param[in]dtls Finish the thread context to free the - * local thread variable for each module. - */ -static void -dss_thread_local_storage_fini(struct dss_thread_local_storage *dtls) -{ - int i; - - if (dtls->dtls_values != NULL) { - for (i = DAOS_MODULE_KEYS_NR - 1; i >= 0; i--) { - struct dss_module_key *dmk = dss_module_keys[i]; - - if (dmk != NULL && dtls->dtls_tag & dmk->dmk_tags) { - D_ASSERT(dtls->dtls_values[i] != NULL); - D_ASSERT(dmk->dmk_fini != NULL); - dmk->dmk_fini(dtls->dtls_tag, dtls->dtls_values[i]); - } - } - } - - D_FREE(dtls->dtls_values); -} - -pthread_key_t dss_tls_key; - -/* - * Allocate dss_thread_local_storage for a particular thread and - * store the pointer in a thread-specific value which can be - * fetched at any time with dss_tls_get(). - */ -struct dss_thread_local_storage * -dss_tls_init(int tag, int xs_id, int tgt_id) -{ - struct dss_thread_local_storage *dtls; - int rc; - - D_ALLOC_PTR(dtls); - if (dtls == NULL) - return NULL; - - dtls->dtls_tag = tag; - rc = dss_thread_local_storage_init(dtls, xs_id, tgt_id); - if (rc != 0) { - D_FREE(dtls); - return NULL; - } - - rc = pthread_setspecific(dss_tls_key, dtls); - if (rc) { - D_ERROR("failed to initialize tls: %d\n", rc); - dss_thread_local_storage_fini(dtls); - D_FREE(dtls); - return NULL; - } - - return dtls; -} - -/* Free DTC for a particular thread. */ -void -dss_tls_fini(struct dss_thread_local_storage *dtls) -{ - dss_thread_local_storage_fini(dtls); - D_FREE(dtls); - pthread_setspecific(dss_tls_key, NULL); -} diff --git a/src/gurt/examples/telem_consumer_example.c b/src/gurt/examples/telem_consumer_example.c index 53cc0311d7f..25b1a518956 100644 --- a/src/gurt/examples/telem_consumer_example.c +++ b/src/gurt/examples/telem_consumer_example.c @@ -147,6 +147,13 @@ void read_metrics(struct d_tm_context *ctx, struct d_tm_node_t *root, d_tm_list_free(head); } +static void +iter_print(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, + char *path, int format, int opt_fields, void *arg) +{ + d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg); +} + int main(int argc, char **argv) { @@ -178,7 +185,7 @@ main(int argc, char **argv) D_TM_DURATION | D_TM_GAUGE | D_TM_DIRECTORY); show_meta = true; d_tm_iterate(ctx, root, 0, filter, NULL, D_TM_STANDARD, - D_TM_INCLUDE_METADATA, D_TM_ITER_READ, stdout); + D_TM_INCLUDE_METADATA, iter_print, stdout); sprintf(dirname, "manually added"); filter = (D_TM_COUNTER | D_TM_TIMESTAMP | D_TM_TIMER_SNAPSHOT | diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index bdd963207bb..a1eb7ce810a 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -69,8 +69,9 @@ static struct d_tm_shmem { struct d_tm_context *ctx; /** context for the producer */ struct d_tm_node_t *root; /** root node of shmem */ pthread_mutex_t add_lock; /** for synchronized access */ - bool sync_access; /** whether to sync access */ - bool retain; /** retain shmem region on exit */ + uint32_t retain:1, /* retain shmem region during exit */ + sync_access:1, + retain_non_empty:1; /** retain shmem region if it is not empty */ int id; /** Instance ID */ } tm_shmem; @@ -200,6 +201,7 @@ attach_shmem(key_t key, size_t size, int flags, struct d_tm_shmem_hdr **shmem) return -DER_SHMEM_PERMS; } + D_INFO("allocate shmid %d key 0x%x addr %p\n", shmid, key, addr); *shmem = addr; return shmid; } @@ -529,7 +531,7 @@ init_node(struct d_tm_shmem_hdr *shmem, struct d_tm_node_t *node, D_ERROR("cannot allocate node name [%s]\n", name); return -DER_NO_SHMEM; } - strncpy(node->dtn_name, name, buff_len); + strncpy(conv_ptr(shmem, node->dtn_name), name, buff_len); node->dtn_shmem_key = shmem->sh_key; node->dtn_child = NULL; /* may be reinitializing an existing node, in which case we shouldn't @@ -557,6 +559,7 @@ alloc_node(struct d_tm_shmem_hdr *shmem, struct d_tm_node_t **newnode, const char *name) { struct d_tm_node_t *node = NULL; + struct d_tm_node_t *tmp; int rc = DER_SUCCESS; if (shmem == NULL || newnode == NULL || name == NULL) { @@ -569,13 +572,16 @@ alloc_node(struct d_tm_shmem_hdr *shmem, struct d_tm_node_t **newnode, rc = -DER_NO_SHMEM; goto out; } - rc = init_node(shmem, node, name); + + tmp = conv_ptr(shmem, node); + + rc = init_node(shmem, tmp, name); if (rc != 0) goto out; - node->dtn_metric = NULL; - node->dtn_sibling = NULL; - *newnode = node; + tmp->dtn_metric = NULL; + tmp->dtn_sibling = NULL; + *newnode = node; out: return rc; } @@ -624,10 +630,10 @@ add_child(struct d_tm_node_t **newnode, struct d_tm_node_t *parent, * 1) a previously-cleared link node that can be reused, or * 2) the right place to attach a newly allocated node. */ - child = parent->dtn_child; + child = conv_ptr(shmem, parent->dtn_child); while (child != NULL && !is_cleared_link(tm_shmem.ctx, child)) { sibling = child; - child = child->dtn_sibling; + child = conv_ptr(shmem, child->dtn_sibling); } if (is_cleared_link(tm_shmem.ctx, child)) { @@ -657,6 +663,7 @@ add_child(struct d_tm_node_t **newnode, struct d_tm_node_t *parent, else sibling->dtn_sibling = *newnode; + *newnode = conv_ptr(shmem, *newnode); return 0; failure: @@ -772,7 +779,7 @@ destroy_shmem_with_key(key_t key) int d_tm_init(int id, uint64_t mem_size, int flags) { - struct d_tm_shmem_hdr *new_shmem; + struct d_tm_shmem_hdr *new_shmem = NULL; key_t key; int shmid; char tmp[D_TM_MAX_NAME_LEN]; @@ -780,31 +787,47 @@ d_tm_init(int id, uint64_t mem_size, int flags) memset(&tm_shmem, 0, sizeof(tm_shmem)); - if ((flags & ~(D_TM_SERIALIZATION | D_TM_RETAIN_SHMEM)) != 0) { - D_ERROR("Invalid flags\n"); + if ((flags & ~(D_TM_SERIALIZATION | D_TM_RETAIN_SHMEM | + D_TM_RETAIN_SHMEM_IF_NON_EMPTY | D_TM_OPEN_OR_CREATE)) != 0) { + D_ERROR("Invalid flags 0x%x\n", flags); rc = -DER_INVAL; goto failure; } if (flags & D_TM_SERIALIZATION) { - tm_shmem.sync_access = true; + tm_shmem.sync_access = 1; D_INFO("Serialization enabled for id %d\n", id); } if (flags & D_TM_RETAIN_SHMEM) { - tm_shmem.retain = true; + tm_shmem.retain = 1; D_INFO("Retaining shared memory for id %d\n", id); } + if (flags & D_TM_RETAIN_SHMEM_IF_NON_EMPTY) { + tm_shmem.retain_non_empty = 1; + D_INFO("Retaining shared memory for id %d if not empty\n", id); + } + tm_shmem.id = id; snprintf(tmp, sizeof(tmp), "ID: %d", id); key = d_tm_get_srv_key(id); - rc = destroy_shmem_with_key(key); - if (rc != 0) - goto failure; - rc = create_shmem(tmp, key, mem_size, &shmid, &new_shmem); - if (rc != 0) - goto failure; + if (flags & D_TM_OPEN_OR_CREATE) { + rc = open_shmem(key, &new_shmem); + if (rc > 0) { + D_ASSERT(new_shmem != NULL); + shmid = rc; + } + } + + if (new_shmem == NULL) { + rc = destroy_shmem_with_key(key); + if (rc != 0) + goto failure; + rc = create_shmem(tmp, key, mem_size, &shmid, &new_shmem); + if (rc != 0) + goto failure; + } rc = alloc_ctx(&tm_shmem.ctx, new_shmem, shmid); if (rc != 0) @@ -837,13 +860,21 @@ d_tm_init(int id, uint64_t mem_size, int flags) void d_tm_fini(void) { - bool destroy_shmem = false; + bool destroy_shmem = true; if (tm_shmem.ctx == NULL) goto out; - if (!tm_shmem.retain) - destroy_shmem = true; + if (tm_shmem.retain) + destroy_shmem = false; + + if (tm_shmem.retain_non_empty) { + struct d_tm_node_t *root; + + root = d_tm_get_root(tm_shmem.ctx); + if (root->dtn_child != NULL) + destroy_shmem = false; + } /* close with the option to destroy the shmem region if needed */ close_all_shmem(tm_shmem.ctx, destroy_shmem); @@ -1452,9 +1483,9 @@ _reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node) return DER_SUCCESS; } -static void -reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, - char *path, int format, int opt_fields, FILE *stream) +void +d_tm_reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, + char *path, int format, int opt_fields, FILE *stream) { char *name = NULL; @@ -1468,7 +1499,7 @@ reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, switch (node->dtn_type) { case D_TM_LINK: node = d_tm_follow_link(ctx, node); - reset_node(ctx, node, level, path, format, opt_fields, stream); + d_tm_reset_node(ctx, node, level, path, format, opt_fields, stream); break; case D_TM_DIRECTORY: case D_TM_COUNTER: @@ -1508,20 +1539,19 @@ reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, * Choose D_TM_CSV for comma separated values. * \param[in] opt_fields A bitmask. Set D_TM_INCLUDE_* as desired for * the optional output fields. - * \param[in] show_timestamp Set to true to print the timestamp the metric - * was read by the consumer. - * \param[in] stream Direct output to this stream (stdout, stderr) + * \param[in] iter_cb iterate callback. + * \param[in] cb_arg argument for iterate callback. */ void d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, int filter, char *path, int format, - int opt_fields, uint32_t ops, FILE *stream) + int opt_fields, d_tm_iter_cb_t iter_cb, void *cb_arg) { struct d_tm_shmem_hdr *shmem = NULL; char *fullpath = NULL; char *parent_name = NULL; - if ((node == NULL) || (stream == NULL)) + if (node == NULL) return; if (node->dtn_type == D_TM_LINK) { @@ -1534,14 +1564,8 @@ d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, if (shmem == NULL) return; - if (node->dtn_type & filter) { - if (ops & D_TM_ITER_READ) - d_tm_print_node(ctx, node, level, path, format, - opt_fields, stream); - if (ops & D_TM_ITER_RESET) - reset_node(ctx, node, level, path, format, - opt_fields, stream); - } + if (node->dtn_type & filter) + iter_cb(ctx, node, level, path, format, opt_fields, cb_arg); parent_name = conv_ptr(shmem, node->dtn_name); node = node->dtn_child; @@ -1557,7 +1581,7 @@ d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, D_ASPRINTF(fullpath, "%s/%s", path, parent_name); d_tm_iterate(ctx, node, level + 1, filter, fullpath, format, - opt_fields, ops, stream); + opt_fields, iter_cb, cb_arg); D_FREE(fullpath); node = node->dtn_sibling; node = conv_ptr(shmem, node); @@ -2106,6 +2130,29 @@ is_initialized(void) tm_shmem.ctx->shmem_root != NULL; } +/* + * Get a pointer to the last token in the path without modifying the original + * string. + */ +static const char * +get_last_token(const char *path) +{ + const char *substr = path; + const char *ch; + bool next_token = false; + + for (ch = path; *ch != '\0'; ch++) { + if (*ch == '/') { + next_token = true; + } else if (next_token) { + substr = ch; + next_token = false; + } + } + + return substr; +} + static int add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type, char *desc, char *units, char *path) @@ -2114,6 +2161,7 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type, struct d_tm_node_t *parent_node; struct d_tm_node_t *temp = NULL; struct d_tm_shmem_hdr *shmem; + struct d_tm_metric_t *metric; char *token; char *rest; char *unit_string; @@ -2155,11 +2203,11 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type, } } - temp->dtn_metric->dtm_stats = NULL; + metric = conv_ptr(shmem, temp->dtn_metric); + metric->dtm_stats = NULL; if (has_stats(temp)) { - temp->dtn_metric->dtm_stats = - shmalloc(shmem, sizeof(struct d_tm_stats_t)); - if (temp->dtn_metric->dtm_stats == NULL) { + metric->dtm_stats = shmalloc(shmem, sizeof(struct d_tm_stats_t)); + if (metric->dtm_stats == NULL) { rc = -DER_NO_SHMEM; goto out; } @@ -2176,14 +2224,14 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type, if (buff_len > 0) { buff_len += 1; /** make room for the trailing null */ - temp->dtn_metric->dtm_desc = shmalloc(shmem, buff_len); - if (temp->dtn_metric->dtm_desc == NULL) { + metric->dtm_desc = shmalloc(shmem, buff_len); + if (metric->dtm_desc == NULL) { rc = -DER_NO_SHMEM; goto out; } - strncpy(temp->dtn_metric->dtm_desc, desc, buff_len); + strncpy(conv_ptr(shmem, metric->dtm_desc), desc, buff_len); } else { - temp->dtn_metric->dtm_desc = NULL; + metric->dtm_desc = NULL; } unit_string = units; @@ -2217,14 +2265,14 @@ add_metric(struct d_tm_context *ctx, struct d_tm_node_t **node, int metric_type, if (buff_len > 0) { buff_len += 1; /** make room for the trailing null */ - temp->dtn_metric->dtm_units = shmalloc(shmem, buff_len); - if (temp->dtn_metric->dtm_units == NULL) { + metric->dtm_units = shmalloc(shmem, buff_len); + if (metric->dtm_units == NULL) { rc = -DER_NO_SHMEM; goto out; } - strncpy(temp->dtn_metric->dtm_units, unit_string, buff_len); + strncpy(conv_ptr(shmem, metric->dtm_units), unit_string, buff_len); } else { - temp->dtn_metric->dtm_units = NULL; + metric->dtm_units = NULL; } temp->dtn_protect = false; @@ -2359,12 +2407,17 @@ static int get_free_region_entry(struct d_tm_shmem_hdr *shmem, struct shmem_region_list **entry) { + d_list_t *cur; + d_list_t *head; + d_list_t *next; struct shmem_region_list *tmp; D_ASSERT(shmem != NULL); D_ASSERT(entry != NULL); - d_list_for_each_entry(tmp, &shmem->sh_subregions, rl_link) { + head = &shmem->sh_subregions; + for (cur = conv_ptr(shmem, head->next); cur != head; cur = conv_ptr(shmem, cur->next)) { + tmp = d_list_entry(cur, __typeof__(*tmp), rl_link); if (tmp->rl_link_node == NULL) { *entry = tmp; return 0; @@ -2377,7 +2430,17 @@ get_free_region_entry(struct d_tm_shmem_hdr *shmem, shmem->sh_key); return -DER_NO_SHMEM; } - d_list_add(&tmp->rl_link, &shmem->sh_subregions); + + next = conv_ptr(shmem, head->next); + cur = head->next; + + head->next = &tmp->rl_link; + next->prev = &tmp->rl_link; + + tmp = conv_ptr(shmem, tmp); + tmp->rl_link.next = cur; + tmp->rl_link.prev = (d_list_t *)(shmem->sh_base_addr + + (uint64_t)(&((struct d_tm_shmem_hdr *)(0))->sh_subregions)); *entry = tmp; return 0; @@ -2413,29 +2476,6 @@ get_unique_shmem_key(const char *path, int id) return (key_t)d_hash_string_u32(salted, sizeof(salted)); } -/* - * Get a pointer to the last token in the path without modifying the original - * string. - */ -static const char * -get_last_token(const char *path) -{ - const char *substr = path; - const char *ch; - bool next_token = false; - - for (ch = path; *ch != '\0'; ch++) { - if (*ch == '/') { - next_token = true; - } else if (next_token) { - substr = ch; - next_token = false; - } - } - - return substr; -} - /** * Creates a directory in the metric tree at the path designated by fmt that * can be deleted later, with all its children. @@ -2460,6 +2500,7 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, struct d_tm_context *ctx = tm_shmem.ctx; struct d_tm_shmem_hdr *parent_shmem; struct d_tm_shmem_hdr *new_shmem; + struct d_tm_metric_t *link_metric; struct shmem_region_list *region_entry; va_list args; key_t key; @@ -2522,8 +2563,6 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, D_ERROR("can't set up the link node, " DF_RC "\n", DP_RC(rc)); D_GOTO(fail_tracking, rc); } - D_ASSERT(link_node->dtn_type == D_TM_LINK); - link_node->dtn_metric->dtm_data.value = key; /* track attached regions within the parent shmem */ parent_shmem = get_shmem_for_key(ctx, link_node->dtn_shmem_key); @@ -2531,6 +2570,11 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, D_ERROR("failed to get parent shmem pointer\n"); D_GOTO(fail_link, rc = -DER_NO_SHMEM); } + + D_ASSERT(link_node->dtn_type == D_TM_LINK); + link_metric = conv_ptr(parent_shmem, link_node->dtn_metric); + link_metric->dtm_data.value = key; + rc = get_free_region_entry(parent_shmem, ®ion_entry); if (rc != 0) D_GOTO(fail_link, rc); @@ -2562,9 +2606,13 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, static void clear_region_entry_for_key(struct d_tm_shmem_hdr *shmem, key_t key) { + d_list_t *cur; + d_list_t *head; struct shmem_region_list *tmp; - d_list_for_each_entry(tmp, &shmem->sh_subregions, rl_link) { + head = &shmem->sh_subregions; + for (cur = conv_ptr(shmem, head->next); cur != head; cur = conv_ptr(shmem, cur->next)) { + tmp = d_list_entry(cur, __typeof__(*tmp), rl_link); if (tmp->rl_key == key) { D_DEBUG(DB_TRACE, "cleared shmem metadata for key 0x%x\n", key); @@ -2583,6 +2631,8 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link) struct d_tm_shmem_hdr *parent_shmem; struct d_tm_shmem_hdr *shmem; struct d_tm_node_t *node; + d_list_t *cur; + d_list_t *head; struct shmem_region_list *curr; key_t key; int rc = 0; @@ -2616,7 +2666,9 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link) } /* delete sub-regions recursively */ - d_list_for_each_entry(curr, &shmem->sh_subregions, rl_link) { + head = &shmem->sh_subregions; + for (cur = conv_ptr(shmem, head->next); cur != head; cur = conv_ptr(shmem, cur->next)) { + curr = d_list_entry(cur, __typeof__(*curr), rl_link); rc = rm_ephemeral_dir(ctx, curr->rl_link_node); if (rc != 0) /* nothing much we can do to recover here */ D_ERROR("error removing tmp dir [%s]: "DF_RC"\n", @@ -3669,7 +3721,7 @@ shmalloc(struct d_tm_shmem_hdr *shmem, int length) D_DEBUG(DB_TRACE, "Allocated %d bytes. Now %" PRIu64 " remain\n", length, shmem->sh_bytes_free); - memset(new_mem, 0, length); + memset(conv_ptr(shmem, new_mem), 0, length); return new_mem; } diff --git a/src/gurt/tests/test_gurt_telem_producer.c b/src/gurt/tests/test_gurt_telem_producer.c index 0a1731c607d..79a2af1a6f4 100644 --- a/src/gurt/tests/test_gurt_telem_producer.c +++ b/src/gurt/tests/test_gurt_telem_producer.c @@ -1226,6 +1226,13 @@ test_verify_object_count(void **state) assert_int_equal(num, exp_total); } +static void +iter_print(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, + char *path, int format, int opt_fields, void *arg) +{ + d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg); +} + static void test_print_metrics(void **state) { @@ -1239,14 +1246,14 @@ test_print_metrics(void **state) D_TM_DURATION | D_TM_GAUGE | D_TM_DIRECTORY); d_tm_iterate(cli_ctx, node, 0, filter, NULL, D_TM_STANDARD, - D_TM_INCLUDE_METADATA, D_TM_ITER_READ, stdout); + D_TM_INCLUDE_METADATA, iter_print, stdout); d_tm_print_field_descriptors(D_TM_INCLUDE_TIMESTAMP | D_TM_INCLUDE_METADATA, stdout); filter &= ~D_TM_DIRECTORY; d_tm_iterate(cli_ctx, node, 0, filter, NULL, D_TM_CSV, - D_TM_INCLUDE_METADATA, D_TM_ITER_READ, stdout); + D_TM_INCLUDE_METADATA, iter_print, stdout); } static void diff --git a/src/include/daos/metric.h b/src/include/daos/metric.h new file mode 100644 index 00000000000..9417b52fdc9 --- /dev/null +++ b/src/include/daos/metric.h @@ -0,0 +1,19 @@ +/* + * (C) Copyright 2020-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +#ifndef __DAOS_METRIC_H__ +#define __DAOS_METRIC_H__ + +/** + * Called during library initialization to init metrics. + */ +int dc_tm_init(void); + +/** + * Called during library finalization to free metrics resources + */ +void dc_tm_fini(void); + +#endif /* __DAOS_TM_H__ */ diff --git a/src/include/daos/tls.h b/src/include/daos/tls.h new file mode 100644 index 00000000000..446ff53c180 --- /dev/null +++ b/src/include/daos/tls.h @@ -0,0 +1,114 @@ +/** + * (C) Copyright 2016-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * This file is part of daos + * + * src/include/daos/tls.h + */ + +#ifndef __DAOS_TLS_H__ +#define __DAOS_TLS_H__ + +#include +#include + +/** + * Stackable Module API + * Provides a modular interface to load and register server-side code on + * demand. A module is composed of: + * - a set of request handlers which are registered when the module is loaded. + * - a server-side API (see header files suffixed by "_srv") used for + * inter-module direct calls. + * + * For now, all loaded modules are assumed to be trustful, but sandboxes can be + * implemented in the future. + */ +/* + * Thead-local storage + */ +struct daos_thread_local_storage { + uint32_t dtls_tag; + void **dtls_values; +}; + +enum daos_module_tag { + DAOS_SYS_TAG = 1 << 0, /** only run on system xstream */ + DAOS_TGT_TAG = 1 << 1, /** only run on target xstream */ + DAOS_RDB_TAG = 1 << 2, /** only run on rdb xstream */ + DAOS_OFF_TAG = 1 << 3, /** only run on offload/helper xstream */ + DAOS_CLI_TAG = 1 << 4, /** only run on client stack */ + DAOS_SERVER_TAG = 0xff, /** run on all xstream */ +}; + +/* The module key descriptor for each xstream */ +struct daos_module_key { + /* Indicate where the keys should be instantiated */ + enum daos_module_tag dmk_tags; + + /* The position inside the daos_module_keys */ + int dmk_index; + /* init keys for context */ + void *(*dmk_init)(int tags, int xs_id, int tgt_id); + + /* fini keys for context */ + void (*dmk_fini)(int tags, void *data); +}; + +#define DAOS_MODULE_KEYS_NR 10 +struct daos_thread_local_storage *dss_tls_get(void); +struct daos_thread_local_storage *dc_tls_get(unsigned int tag); + +int ds_tls_key_create(void); +int dc_tls_key_create(void); +void ds_tls_key_delete(void); +void dc_tls_key_delete(void); +/* For now TLS is only enabled if metrics are enabled */ +#define METRIC_DUMP_ENV "DAOS_METRIC_DUMP_ENV" +#define DAOS_CLIENT_METRICS_ENV "DAOS_CLIENT_METRICS" +#define DAOS_CLIENT_METRICS_RETAIN_ENV "DAOS_CLIENT_METRICS_RETAIN" +extern bool daos_client_metric; +extern bool daos_client_metric_retain; +struct daos_module_key* daos_get_module_key(int index); + +/** + * Get value from context by the key + * + * Get value inside dtls by key. So each module will use this API to + * retrieve their own value in the thread context. + * + * \param[in] dtls the thread context. + * \param[in] key key used to retrieve the dtls_value. + * + * \retval the dtls_value retrieved by key. + */ +static inline void * +daos_module_key_get(struct daos_thread_local_storage *dtls, + struct daos_module_key *key) +{ + D_ASSERT(key->dmk_index >= 0); + D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR); + D_ASSERT(daos_get_module_key(key->dmk_index) == key); + D_ASSERT(dtls != NULL); + + return dtls->dtls_values[key->dmk_index]; +} + +#define dss_module_key_get daos_module_key_get +#define dss_register_key daos_register_key +#define dss_unregister_key daos_unregister_key +#define dss_module_info daos_module_info +#define dss_module_tag daos_module_tag +#define dss_module_key daos_module_key +#define dss_thread_local_storage daos_thread_local_storage + +void daos_register_key(struct daos_module_key *key); +void daos_unregister_key(struct daos_module_key *key); +struct daos_thread_local_storage * dc_tls_init(int tag, uint32_t pid); +void dc_tls_fini(void); +struct daos_thread_local_storage * dss_tls_init(int tag, int xs_id, int tgt_id); +void dss_tls_fini(struct daos_thread_local_storage *dtls); + +#endif /*__DAOS_TLS_H__*/ diff --git a/src/include/daos_srv/daos_engine.h b/src/include/daos_srv/daos_engine.h index be491483fbc..db7418a21e9 100644 --- a/src/include/daos_srv/daos_engine.h +++ b/src/include/daos_srv/daos_engine.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -54,84 +55,6 @@ extern unsigned int dss_instance_idx; /** Bypass for the nvme health check */ extern bool dss_nvme_bypass_health_check; -/** - * Stackable Module API - * Provides a modular interface to load and register server-side code on - * demand. A module is composed of: - * - a set of request handlers which are registered when the module is loaded. - * - a server-side API (see header files suffixed by "_srv") used for - * inter-module direct calls. - * - * For now, all loaded modules are assumed to be trustful, but sandboxes can be - * implemented in the future. - */ -/* - * Thead-local storage - */ -struct dss_thread_local_storage { - uint32_t dtls_tag; - void **dtls_values; -}; - -enum dss_module_tag { - DAOS_SYS_TAG = 1 << 0, /** only run on system xstream */ - DAOS_TGT_TAG = 1 << 1, /** only run on target xstream */ - DAOS_RDB_TAG = 1 << 2, /** only run on rdb xstream */ - DAOS_OFF_TAG = 1 << 3, /** only run on offload/helper xstream */ - DAOS_SERVER_TAG = 0xff, /** run on all xstream */ -}; - -/* The module key descriptor for each xstream */ -struct dss_module_key { - /* Indicate where the keys should be instantiated */ - enum dss_module_tag dmk_tags; - - /* The position inside the dss_module_keys */ - int dmk_index; - /* init keys for context */ - void *(*dmk_init)(int tags, int xs_id, int tgt_id); - - /* fini keys for context */ - void (*dmk_fini)(int tags, void *data); -}; - -extern pthread_key_t dss_tls_key; -extern struct dss_module_key *dss_module_keys[]; -#define DAOS_MODULE_KEYS_NR 10 - -static inline struct dss_thread_local_storage * -dss_tls_get() -{ - return (struct dss_thread_local_storage *) - pthread_getspecific(dss_tls_key); -} - -/** - * Get value from context by the key - * - * Get value inside dtls by key. So each module will use this API to - * retrieve their own value in the thread context. - * - * \param[in] dtls the thread context. - * \param[in] key key used to retrieve the dtls_value. - * - * \retval the dtls_value retrieved by key. - */ -static inline void * -dss_module_key_get(struct dss_thread_local_storage *dtls, - struct dss_module_key *key) -{ - D_ASSERT(key->dmk_index >= 0); - D_ASSERT(key->dmk_index < DAOS_MODULE_KEYS_NR); - D_ASSERT(dss_module_keys[key->dmk_index] == key); - D_ASSERT(dtls != NULL); - - return dtls->dtls_values[key->dmk_index]; -} - -void dss_register_key(struct dss_module_key *key); -void dss_unregister_key(struct dss_module_key *key); - /** pthread names are limited to 16 chars */ #define DSS_XS_NAME_LEN (32) @@ -172,7 +95,7 @@ static inline struct dss_module_info * dss_get_module_info(void) { struct dss_module_info *dmi; - struct dss_thread_local_storage *dtc; + struct daos_thread_local_storage *dtc; dtc = dss_tls_get(); dmi = (struct dss_module_info *) diff --git a/src/include/gurt/telemetry_common.h b/src/include/gurt/telemetry_common.h index 983ec2553f2..a3ba8902010 100644 --- a/src/include/gurt/telemetry_common.h +++ b/src/include/gurt/telemetry_common.h @@ -155,6 +155,8 @@ enum { D_TM_SERVER_PROCESS = 0x000, D_TM_SERIALIZATION = 0x001, D_TM_RETAIN_SHMEM = 0x002, + D_TM_RETAIN_SHMEM_IF_NON_EMPTY = 0x004, + D_TM_OPEN_OR_CREATE = 0x008, }; /** Output formats */ @@ -176,6 +178,7 @@ enum { D_TM_ITER_RESET = 0x002, }; +#define DC_TM_JOB_ROOT_ID 256 /** * @brief Statistics for gauge and duration metrics * diff --git a/src/include/gurt/telemetry_consumer.h b/src/include/gurt/telemetry_consumer.h index f0b1d706be7..9b8de3d70fa 100644 --- a/src/include/gurt/telemetry_consumer.h +++ b/src/include/gurt/telemetry_consumer.h @@ -49,12 +49,21 @@ int d_tm_list(struct d_tm_context *ctx, struct d_tm_nodeList_t **head, int d_tm_list_subdirs(struct d_tm_context *ctx, struct d_tm_nodeList_t **head, struct d_tm_node_t *node, uint64_t *node_count, int max_depth); + +typedef void (*d_tm_iter_cb_t)(struct d_tm_context *ctx, struct d_tm_node_t *node, + int level, char *path, int format, int opt_fields, + void *cb_arg); + void d_tm_iterate(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, int filter, char *path, int format, - int opt_fields, uint32_t ops, FILE *stream); + int opt_fields, d_tm_iter_cb_t iter_cb, void *cb_arg); void d_tm_print_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, char *name, int format, int opt_fields, FILE *stream); + +void d_tm_reset_node(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, + char *path, int format, int opt_fields, FILE *stream); + void d_tm_print_field_descriptors(int opt_fields, FILE *stream); void d_tm_print_counter(uint64_t val, char *name, int format, char *units, int opt_fields, FILE *stream); diff --git a/src/object/cli_mod.c b/src/object/cli_mod.c index 79c13fee948..97fcdec2372 100644 --- a/src/object/cli_mod.c +++ b/src/object/cli_mod.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -12,6 +12,10 @@ #include #include #include +#include +#include +#include +#include #include #include "obj_rpc.h" #include "obj_internal.h" @@ -19,14 +23,99 @@ unsigned int srv_io_mode = DIM_DTX_FULL_ENABLED; int dc_obj_proto_version; +static void* +dc_obj_tls_init(int tags, int xs_id, int pid) +{ + struct dc_obj_tls *tls; + int opc; + int rc; + unsigned long tid = pthread_self(); + + D_ALLOC_PTR(tls); + if (tls == NULL) + return NULL; + + /** register different per-opcode sensors */ + for (opc = 0; opc < OBJ_PROTO_CLI_COUNT; opc++) { + /** Start with number of active requests, of type gauge */ + rc = d_tm_add_metric(&tls->cot_op_active[opc], D_TM_STATS_GAUGE, + "number of active object RPCs", "ops", + "%s/%u/%lu/ops/%s/active", dc_jobid, pid, tid, + obj_opc_to_str(opc)); + if (rc) { + D_WARN("Failed to create active counter: "DF_RC"\n", DP_RC(rc)); + D_GOTO(out, rc); + } + + if (opc == DAOS_OBJ_RPC_UPDATE || + opc == DAOS_OBJ_RPC_TGT_UPDATE || + opc == DAOS_OBJ_RPC_FETCH) + /** See below, latency reported per size for those */ + continue; + + /** And finally the per-opcode latency, of type gauge */ + rc = d_tm_add_metric(&tls->cot_op_lat[opc], D_TM_STATS_GAUGE, + "object RPC processing time", "us", + "%s/%u/%lu/ops/%s/latency", dc_jobid, pid, tid, + obj_opc_to_str(opc)); + if (rc) { + D_WARN("Failed to create latency sensor: "DF_RC"\n", DP_RC(rc)); + D_GOTO(out, rc); + } + } + + /** + * Maintain per-I/O size latency for update & fetch RPCs + * of type gauge + */ + rc = obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, pid, tls->cot_update_lat, + obj_opc_to_str(DAOS_OBJ_RPC_UPDATE), + "update RPC processing time", false); + if (rc) + D_GOTO(out, rc); + + rc = obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, pid, tls->cot_fetch_lat, + obj_opc_to_str(DAOS_OBJ_RPC_FETCH), + "fetch RPC processing time", false); + if (rc) + D_GOTO(out, rc); + +out: + if (rc) { + D_FREE(tls); + tls = NULL; + } + + return tls; +} + +static void +dc_obj_tls_fini(int tags, void *data) +{ + struct dc_obj_tls *tls = data; + + D_FREE(tls); +} + +struct daos_module_key dc_obj_module_key = { + .dmk_tags = DAOS_CLI_TAG, + .dmk_index = -1, + .dmk_init = dc_obj_tls_init, + .dmk_fini = dc_obj_tls_fini, +}; + /** * Initialize object interface */ int dc_obj_init(void) { - uint32_t ver_array[2] = {DAOS_OBJ_VERSION - 1, DAOS_OBJ_VERSION}; - int rc; + uint32_t ver_array[2] = {DAOS_OBJ_VERSION - 1, DAOS_OBJ_VERSION}; + int rc; + + d_getenv_bool(DAOS_CLIENT_METRICS_ENV, &daos_client_metric); + if (daos_client_metric) + daos_register_key(&dc_obj_module_key); rc = obj_utils_init(); if (rc) @@ -78,6 +167,7 @@ dc_obj_init(void) out_utils: if (rc) obj_utils_fini(); + return rc; } @@ -94,4 +184,6 @@ dc_obj_fini(void) obj_ec_codec_fini(); obj_class_fini(); obj_utils_fini(); + if (daos_client_metric) + daos_unregister_key(&dc_obj_module_key); } diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index 2dd9ef9ac39..dec1bec1363 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include "cli_csum.h" #include "obj_rpc.h" #include "obj_internal.h" @@ -105,6 +107,7 @@ struct rw_cb_args { daos_iom_t *maps; crt_endpoint_t tgt_ep; struct shard_rw_args *shard_args; + uint64_t send_time; }; static d_iov_t * @@ -640,6 +643,94 @@ dc_shard_update_size(struct rw_cb_args *rw_args, int fetch_rc) return rc; } +daos_size_t +obj_get_fetch_size(struct rw_cb_args *arg) +{ + struct obj_rw_v10_out *orwo; + daos_size_t size = 0; + + orwo = crt_reply_get(arg->rpc); + + if (orwo->orw_sgls.ca_count > 0) { + /* inline transfer */ + size = daos_sgls_packed_size(orwo->orw_sgls.ca_arrays, + orwo->orw_sgls.ca_count, NULL); + } else if (arg->rwaa_sgls != NULL) { + /* bulk transfer */ + daos_size_t *replied_sizes = orwo->orw_data_sizes.ca_arrays; + int i; + + for (i = 0; i < orwo->orw_data_sizes.ca_count; i++) + size += replied_sizes[i]; + } + + return size; +} + +static void +obj_shard_update_metrics_begin(crt_rpc_t *rpc) +{ + struct dc_obj_tls *tls; + int opc; + + if (!daos_client_metric) + return; + + tls = dc_obj_tls_get(); + D_ASSERT(tls != NULL); + opc = opc_get(rpc->cr_opc); + d_tm_inc_gauge(tls->cot_op_active[opc], 1); +} + +static void +obj_shard_update_metrics_end(crt_rpc_t *rpc, uint64_t send_time, void *arg, int ret) +{ + struct dc_obj_tls *tls; + struct rw_cb_args *rw_args; + struct obj_rw_in *orw; + struct d_tm_node_t *lat = NULL; + daos_size_t size; + uint64_t time; + int opc; + + if (!daos_client_metric) + return; + + tls = dc_obj_tls_get(); + D_ASSERT(tls != NULL); + opc = opc_get(rpc->cr_opc); + orw = crt_req_get(rpc); + d_tm_dec_gauge(tls->cot_op_active[opc], 1); + + if (ret != 0) + return; + /** + * Measure latency of successful I/O only. + * Use bit shift for performance and tolerate some inaccuracy. + */ + time = daos_get_ntime() - send_time; + time >>= 10; + + switch (opc) { + case DAOS_OBJ_RPC_UPDATE: + rw_args = arg; + size = daos_sgls_packed_size(rw_args->rwaa_sgls, orw->orw_nr, NULL); + lat = tls->cot_update_lat[lat_bucket(size)]; + break; + case DAOS_OBJ_RPC_FETCH: + rw_args = arg; + size = obj_get_fetch_size(rw_args); + lat = tls->cot_fetch_lat[lat_bucket(size)]; + break; + default: + lat = tls->cot_op_lat[opc]; + break; + } + + if (lat != NULL) + d_tm_set_gauge(lat, time); +} + static int dc_rw_cb(tse_task_t *task, void *arg) { @@ -956,10 +1047,15 @@ dc_rw_cb(tse_task_t *task, void *arg) out: if (rc == -DER_CSUM && opc == DAOS_OBJ_RPC_FETCH) dc_shard_csum_report(task, &rw_args->tgt_ep, rw_args->rpc); + + obj_shard_update_metrics_end(rw_args->rpc, rw_args->send_time, rw_args, + ret == 0 ? rc : ret); + crt_req_decref(rw_args->rpc); if (ret == 0 || obj_retry_error(rc)) ret = rc; + return ret; } @@ -1129,7 +1225,9 @@ dc_obj_shard_rw(struct dc_obj_shard *shard, enum obj_rpc_opc opc, rw_args.co = shard->do_co; rw_args.shard_args = args; /* remember the sgl to copyout the data inline for fetch */ - rw_args.rwaa_sgls = (opc == DAOS_OBJ_RPC_FETCH) ? sgls : NULL; + rw_args.rwaa_sgls = sgls; + rw_args.send_time = daos_get_ntime(); + obj_shard_update_metrics_begin(req); if (args->reasb_req && args->reasb_req->orr_recov) { rw_args.maps = NULL; orw->orw_flags |= ORF_EC_RECOV; @@ -1189,6 +1287,7 @@ struct obj_punch_cb_args { crt_rpc_t *rpc; unsigned int *map_ver; struct shard_punch_args *shard_args; + uint64_t send_time; }; static int @@ -1217,7 +1316,11 @@ obj_shard_punch_cb(tse_task_t *task, void *data) } } + obj_shard_update_metrics_end(cb_args->rpc, cb_args->send_time, cb_args, + task->dt_result); + crt_req_decref(rpc); + return task->dt_result; } @@ -1262,6 +1365,8 @@ dc_obj_shard_punch(struct dc_obj_shard *shard, enum obj_rpc_opc opc, cb_args.rpc = req; cb_args.map_ver = &args->pa_auxi.map_ver; cb_args.shard_args = args; + cb_args.send_time = daos_get_ntime(); + obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, obj_shard_punch_cb, &cb_args, sizeof(cb_args)); if (rc != 0) @@ -1324,6 +1429,7 @@ struct obj_enum_args { struct dtx_epoch *epoch; daos_handle_t *th; uint64_t *enqueue_id; + uint64_t send_time; uint32_t *max_delay; }; @@ -1652,10 +1758,15 @@ dc_enumerate_cb(tse_task_t *task, void *arg) crt_bulk_free(oei->oei_bulk); if (oei->oei_kds_bulk != NULL) crt_bulk_free(oei->oei_kds_bulk); + + obj_shard_update_metrics_end(enum_args->rpc, enum_args->send_time, + enum_args, ret == 0 ? rc : ret); + crt_req_decref(enum_args->rpc); if (ret == 0 || obj_retry_error(rc)) ret = rc; + return ret; } @@ -1805,6 +1916,8 @@ dc_obj_shard_list(struct dc_obj_shard *obj_shard, enum obj_rpc_opc opc, enum_args.th = &obj_args->th; enum_args.enqueue_id = &args->la_auxi.enqueue_id; enum_args.max_delay = &args->la_auxi.obj_auxi->max_delay; + enum_args.send_time = daos_get_ntime(); + obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, dc_enumerate_cb, &enum_args, sizeof(enum_args)); if (rc != 0) @@ -1838,6 +1951,7 @@ struct obj_query_key_cb_args { daos_handle_t th; uint32_t *max_delay; uint64_t *queue_id; + uint64_t send_time; }; static void @@ -2048,6 +2162,7 @@ obj_shard_query_key_cb(tse_task_t *task, void *data) D_SPIN_UNLOCK(&cb_args->obj->cob_spin); out: + obj_shard_update_metrics_end(rpc, cb_args->send_time, cb_args, ret == 0 ? rc : ret); crt_req_decref(rpc); if (ret == 0 || obj_retry_error(rc)) ret = rc; @@ -2101,6 +2216,8 @@ dc_obj_shard_query_key(struct dc_obj_shard *shard, struct dtx_epoch *epoch, uint cb_args.max_epoch = max_epoch; cb_args.queue_id = queue_id; cb_args.max_delay = max_delay; + cb_args.send_time = daos_get_ntime(); + obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, obj_shard_query_key_cb, &cb_args, sizeof(cb_args)); if (rc != 0) @@ -2147,6 +2264,7 @@ struct obj_shard_sync_cb_args { uint32_t *map_ver; uint32_t *max_delay; uint64_t *enqueue_id; + uint64_t send_time; }; static int @@ -2202,6 +2320,8 @@ obj_shard_sync_cb(tse_task_t *task, void *data) oso->oso_epoch, oso->oso_map_version); out: + obj_shard_update_metrics_end(rpc, cb_args->send_time, cb_args, rc); + crt_req_decref(rpc); return rc; } @@ -2248,7 +2368,8 @@ dc_obj_shard_sync(struct dc_obj_shard *shard, enum obj_rpc_opc opc, cb_args.map_ver = &args->sa_auxi.map_ver; cb_args.max_delay = &args->sa_auxi.obj_auxi->max_delay; cb_args.enqueue_id = &args->sa_auxi.enqueue_id; - + cb_args.send_time = daos_get_ntime(); + obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, obj_shard_sync_cb, &cb_args, sizeof(cb_args)); if (rc != 0) @@ -2284,8 +2405,9 @@ struct obj_k2a_args { struct dtx_epoch *epoch; daos_handle_t *th; daos_anchor_t *anchor; - uint32_t shard; uint64_t *enqueue_id; + uint64_t send_time; + uint32_t shard; uint32_t *max_delay; }; @@ -2353,6 +2475,8 @@ dc_k2a_cb(tse_task_t *task, void *arg) enum_anchor_copy(k2a_args->anchor, &oko->oko_anchor); dc_obj_shard2anchor(k2a_args->anchor, k2a_args->shard); out: + obj_shard_update_metrics_end(k2a_args->rpc, k2a_args->send_time, k2a_args, + ret == 0 ? rc : ret); if (k2a_args->eaa_obj != NULL) obj_shard_decref(k2a_args->eaa_obj); crt_req_decref(k2a_args->rpc); @@ -2429,6 +2553,8 @@ dc_obj_shard_key2anchor(struct dc_obj_shard *obj_shard, enum obj_rpc_opc opc, cb_args.shard = obj_shard->do_shard_idx; cb_args.enqueue_id = &args->ka_auxi.enqueue_id; cb_args.max_delay = &args->ka_auxi.obj_auxi->max_delay; + cb_args.send_time = daos_get_ntime(); + obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, dc_k2a_cb, &cb_args, sizeof(cb_args)); if (rc != 0) D_GOTO(out_eaa, rc); diff --git a/src/object/obj_internal.h b/src/object/obj_internal.h index 8a2b12fff55..d13015a7c27 100644 --- a/src/object/obj_internal.h +++ b/src/object/obj_internal.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "obj_rpc.h" #include "obj_ec.h" @@ -539,6 +540,60 @@ struct dc_obj_verify_args { struct dc_obj_verify_cursor cursor; }; +/* + * Report latency on a per-I/O size. + * Buckets starts at [0; 256B[ and are increased by power of 2 + * (i.e. [256B; 512B[, [512B; 1KB[) up to [4MB; infinity[ + * Since 4MB = 2^22 and 256B = 2^8, this means + * (22 - 8 + 1) = 15 buckets plus the 4MB+ bucket, so + * 16 buckets in total. + */ +#define NR_LATENCY_BUCKETS 16 + +struct dc_obj_tls { + /** Measure update/fetch latency based on I/O size (type = gauge) */ + struct d_tm_node_t *cot_update_lat[NR_LATENCY_BUCKETS]; + struct d_tm_node_t *cot_fetch_lat[NR_LATENCY_BUCKETS]; + + /** Measure per-operation latency in us (type = gauge) */ + struct d_tm_node_t *cot_op_lat[OBJ_PROTO_CLI_COUNT]; + /** Count number of per-opcode active requests (type = gauge) */ + struct d_tm_node_t *cot_op_active[OBJ_PROTO_CLI_COUNT]; +}; + +int +obj_latency_tm_init(uint32_t opc, int tgt_id, struct d_tm_node_t **tm, + char *op, char *desc, bool server); +extern struct daos_module_key dc_obj_module_key; + +static inline struct dc_obj_tls * +dc_obj_tls_get() +{ + struct daos_thread_local_storage *dtls; + + dtls = dc_tls_get(dc_obj_module_key.dmk_tags); + D_ASSERT(dtls != NULL); + return daos_module_key_get(dtls, &dc_obj_module_key); +} + +static inline unsigned int +lat_bucket(uint64_t size) +{ + int nr; + + if (size <= 256) + return 0; + + /** return number of leading zero-bits */ + nr = __builtin_clzl(size - 1); + + /** >4MB, return last bucket */ + if (nr < 42) + return NR_LATENCY_BUCKETS - 1; + + return 56 - nr; +} + static inline int dc_cont2uuid(struct dc_cont *dc_cont, uuid_t *hdl_uuid, uuid_t *uuid) { diff --git a/src/object/obj_utils.c b/src/object/obj_utils.c index 8312c6719d8..758ca0d8fac 100644 --- a/src/object/obj_utils.c +++ b/src/object/obj_utils.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2022 Intel Corporation. + * (C) Copyright 2018-2023 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -10,6 +10,10 @@ #define DDSUBSYS DDFAC(object) #include +#include +#include +#include +#include #include "obj_internal.h" static daos_size_t @@ -86,6 +90,61 @@ daos_iods_free(daos_iod_t *iods, int nr, bool need_free) D_FREE(iods); } +int +obj_latency_tm_init(uint32_t opc, int tgt_id, struct d_tm_node_t **tm, char *op, + char *desc, bool server) +{ + unsigned int bucket_max = 256; + int i; + int rc = 0; + + for (i = 0; i < NR_LATENCY_BUCKETS; i++) { + char *path; + + if (server) { + if (bucket_max < 1024) /** B */ + D_ASPRINTF(path, "io/latency/%s/%uB/tgt_%u", + op, bucket_max, tgt_id); + else if (bucket_max < 1024 * 1024) /** KB */ + D_ASPRINTF(path, "io/latency/%s/%uKB/tgt_%u", + op, bucket_max / 1024, tgt_id); + else if (bucket_max <= 1024 * 1024 * 4) /** MB */ + D_ASPRINTF(path, "io/latency/%s/%uMB/tgt_%u", + op, bucket_max / (1024 * 1024), tgt_id); + else /** >4MB */ + D_ASPRINTF(path, "io/latency/%s/GT4MB/tgt_%u", + op, tgt_id); + } else { + pid_t pid = getpid(); + unsigned long tid = pthread_self(); + + if (bucket_max < 1024) /** B */ + D_ASPRINTF(path, "%s/%u/%lu/io/latency/%s/%uB", + dc_jobid, pid, tid, op, bucket_max); + else if (bucket_max < 1024 * 1024) /** KB */ + D_ASPRINTF(path, "%s/%u/%lu/io/latency/%s/%uKB", + dc_jobid, pid, tid, op, bucket_max / 1024); + else if (bucket_max <= 1024 * 1024 * 4) /** MB */ + D_ASPRINTF(path, "%s/%u/%lu/io/latency/%s/%uMB", + dc_jobid, pid, tid, op, bucket_max / (1024 * 1024)); + else /** >4MB */ + D_ASPRINTF(path, "%s/%u/%lu/io/latency/%s/GT4MB", + dc_jobid, pid, tid, op); + + } + rc = d_tm_add_metric(&tm[i], D_TM_STATS_GAUGE, desc, "us", path); + if (rc) + D_WARN("Failed to create per-I/O size latency " + "sensor: "DF_RC"\n", DP_RC(rc)); + D_FREE(path); + + bucket_max <<= 1; + } + + return rc; +} + + struct recx_rec { daos_recx_t *rr_recx; }; diff --git a/src/object/srv_internal.h b/src/object/srv_internal.h index 4452e040486..4ac391678b2 100644 --- a/src/object/srv_internal.h +++ b/src/object/srv_internal.h @@ -107,16 +107,6 @@ struct migrate_pool_tls { void migrate_pool_tls_destroy(struct migrate_pool_tls *tls); -/* - * Report latency on a per-I/O size. - * Buckets starts at [0; 256B[ and are increased by power of 2 - * (i.e. [256B; 512B[, [512B; 1KB[) up to [4MB; infinity[ - * Since 4MB = 2^22 and 256B = 2^8, this means - * (22 - 8 + 1) = 15 buckets plus the 4MB+ bucket, so - * 16 buckets in total. - */ -#define NR_LATENCY_BUCKETS 16 - struct obj_pool_metrics { /** Count number of total per-opcode requests (type = counter) */ struct d_tm_node_t *opm_total[OBJ_PROTO_CLI_COUNT]; @@ -168,24 +158,6 @@ obj_tls_get() return dss_module_key_get(dss_tls_get(), &obj_module_key); } -static inline unsigned int -lat_bucket(uint64_t size) -{ - int nr; - - if (size <= 256) - return 0; - - /** return number of leading zero-bits */ - nr = __builtin_clzl(size - 1); - - /** >4MB, return last bucket */ - if (nr < 42) - return NR_LATENCY_BUCKETS - 1; - - return 56 - nr; -} - enum latency_type { BULK_LATENCY, BIO_LATENCY, diff --git a/src/object/srv_mod.c b/src/object/srv_mod.c index 72a25ba97de..de3436513e6 100644 --- a/src/object/srv_mod.c +++ b/src/object/srv_mod.c @@ -77,41 +77,6 @@ static struct daos_rpc_handler obj_handlers_v10[] = { #undef X -static int -obj_latency_tm_init(uint32_t opc, int tgt_id, struct d_tm_node_t **tm, char *op, char *desc) -{ - unsigned int bucket_max = 256; - int i; - int rc = 0; - - for (i = 0; i < NR_LATENCY_BUCKETS; i++) { - char *path; - - if (bucket_max < 1024) /** B */ - D_ASPRINTF(path, "io/latency/%s/%uB/tgt_%u", - op, bucket_max, tgt_id); - else if (bucket_max < 1024 * 1024) /** KB */ - D_ASPRINTF(path, "io/latency/%s/%uKB/tgt_%u", - op, bucket_max / 1024, tgt_id); - else if (bucket_max <= 1024 * 1024 * 4) /** MB */ - D_ASPRINTF(path, "io/latency/%s/%uMB/tgt_%u", - op, bucket_max / (1024 * 1024), tgt_id); - else /** >4MB */ - D_ASPRINTF(path, "io/latency/%s/GT4MB/tgt_%u", - op, tgt_id); - - rc = d_tm_add_metric(&tm[i], D_TM_STATS_GAUGE, desc, "us", path); - if (rc) - D_WARN("Failed to create per-I/O size latency " - "sensor: "DF_RC"\n", DP_RC(rc)); - D_FREE(path); - - bucket_max <<= 1; - } - - return rc; -} - static void * obj_tls_init(int tags, int xs_id, int tgt_id) { @@ -162,27 +127,33 @@ obj_tls_init(int tags, int xs_id, int tgt_id) */ obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_lat, - obj_opc_to_str(DAOS_OBJ_RPC_UPDATE), "update RPC processing time"); + obj_opc_to_str(DAOS_OBJ_RPC_UPDATE), "update RPC processing time", + true); obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_lat, - obj_opc_to_str(DAOS_OBJ_RPC_FETCH), "fetch RPC processing time"); + obj_opc_to_str(DAOS_OBJ_RPC_FETCH), "fetch RPC processing time", + true); obj_latency_tm_init(DAOS_OBJ_RPC_TGT_UPDATE, tgt_id, tls->ot_tgt_update_lat, obj_opc_to_str(DAOS_OBJ_RPC_TGT_UPDATE), - "update tgt RPC processing time"); + "update tgt RPC processing time", + true); obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_bulk_lat, - "bulk_update", "Bulk update processing time"); + "bulk_update", "Bulk update processing time", + true); obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_bulk_lat, - "bulk_fetch", "Bulk fetch processing time"); + "bulk_fetch", "Bulk fetch processing time", + true); obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_vos_lat, - "vos_update", "VOS update processing time"); + "vos_update", "VOS update processing time", + true); obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_vos_lat, - "vos_fetch", "VOS fetch processing time"); + "vos_fetch", "VOS fetch processing time", true); obj_latency_tm_init(DAOS_OBJ_RPC_UPDATE, tgt_id, tls->ot_update_bio_lat, - "bio_update", "BIO update processing time"); + "bio_update", "BIO update processing time", true); obj_latency_tm_init(DAOS_OBJ_RPC_FETCH, tgt_id, tls->ot_fetch_bio_lat, - "bio_fetch", "BIO fetch processing time"); + "bio_fetch", "BIO fetch processing time", true); return tls; } diff --git a/src/utils/daos_metrics/daos_metrics.c b/src/utils/daos_metrics/daos_metrics.c index b166b1a31f9..cbdaeb2fc84 100644 --- a/src/utils/daos_metrics/daos_metrics.c +++ b/src/utils/daos_metrics/daos_metrics.c @@ -53,29 +53,91 @@ print_usage(const char *prog_name) "\tInclude gauges\n" "--read, -r\n" "--reset, -e\n" + "\tReset metrics value to 0\n" + "--jobid, -j\n" + "\tDisplay metrics of the specified job\n" "\tInclude timestamp of when metric was read\n", prog_name); } -int -main(int argc, char **argv) +static int +process_metrics(int metric_id, char *dirname, int format, int filter, + int extra_descriptors, int delay, int num_iter, + d_tm_iter_cb_t iter_cb, void *arg) { struct d_tm_node_t *root = NULL; struct d_tm_node_t *node = NULL; struct d_tm_context *ctx = NULL; + int iteration = 0; + int rc = 0; + + ctx = d_tm_open(metric_id); + if (!ctx) + D_GOTO(out, rc = 0); + + root = d_tm_get_root(ctx); + if (!root) + D_GOTO(out, rc = -DER_NONEXIST); + + if (strncmp(dirname, "/", D_TM_MAX_NAME_LEN) != 0) { + node = d_tm_find_metric(ctx, dirname); + if (node != NULL) { + root = node; + } else { + printf("No metrics found at: '%s'\n", dirname); + D_GOTO(out, rc = 0); + } + } + + if (format == D_TM_CSV) + d_tm_print_field_descriptors(extra_descriptors, (FILE *)arg); + + while ((num_iter == 0) || (iteration < num_iter)) { + d_tm_iterate(ctx, root, 0, filter, NULL, format, extra_descriptors, + iter_cb, arg); + iteration++; + sleep(delay); + if (format == D_TM_STANDARD) + printf("\n\n"); + } + +out: + if (ctx != NULL) + d_tm_close(&ctx); + return rc; +} + +static void +iter_print(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, + char *path, int format, int opt_fields, void *arg) +{ + d_tm_print_node(ctx, node, level, path, format, opt_fields, (FILE *)arg); +} + +static void +iter_reset(struct d_tm_context *ctx, struct d_tm_node_t *node, int level, + char *path, int format, int opt_fields, void *arg) +{ + d_tm_reset_node(ctx, node, level, path, format, opt_fields, (FILE *)arg); +} + +int +main(int argc, char **argv) +{ char dirname[D_TM_MAX_NAME_LEN] = {0}; + char jobid[D_TM_MAX_NAME_LEN] = {0}; bool show_meta = false; bool show_when_read = false; bool show_type = false; int srv_idx = 0; - int iteration = 0; int num_iter = 1; int filter = 0; int delay = 1; int format = D_TM_STANDARD; int opt; int extra_descriptors = 0; - uint32_t ops = 0; + d_tm_iter_cb_t iter_cb = NULL; + int rc; sprintf(dirname, "/"); @@ -97,11 +159,12 @@ main(int argc, char **argv) {"type", no_argument, NULL, 'T'}, {"read", no_argument, NULL, 'r'}, {"reset", no_argument, NULL, 'e'}, + {"jobid", required_argument, NULL, 'j'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} }; - opt = getopt_long_only(argc, argv, "S:cCdtsgi:p:D:MmTrhe", + opt = getopt_long_only(argc, argv, "S:cCdtsgi:p:D:MmTrj:he", long_options, NULL); if (opt == -1) break; @@ -150,7 +213,10 @@ main(int argc, char **argv) delay = atoi(optarg); break; case 'e': - ops |= D_TM_ITER_RESET; + iter_cb = iter_reset; + break; + case 'j': + snprintf(jobid, sizeof(jobid), "%s", optarg); break; case 'h': case '?': @@ -160,37 +226,13 @@ main(int argc, char **argv) } } - if (ops == 0) - ops |= D_TM_ITER_READ; + if (iter_cb == NULL) + iter_cb = iter_print; if (filter == 0) filter = D_TM_COUNTER | D_TM_DURATION | D_TM_TIMESTAMP | D_TM_MEMINFO | D_TM_TIMER_SNAPSHOT | D_TM_GAUGE | D_TM_STATS_GAUGE; - ctx = d_tm_open(srv_idx); - if (!ctx) - goto failure; - - root = d_tm_get_root(ctx); - if (!root) - goto failure; - - if (strncmp(dirname, "/", D_TM_MAX_NAME_LEN) != 0) { - node = d_tm_find_metric(ctx, dirname); - if (node != NULL) { - root = node; - } else { - printf("No metrics found at: '%s'\n", dirname); - exit(0); - } - } - - if (format == D_TM_CSV) - filter &= ~D_TM_DIRECTORY; - else - filter |= D_TM_DIRECTORY; - - if (show_when_read) extra_descriptors |= D_TM_INCLUDE_TIMESTAMP; if (show_meta) @@ -199,27 +241,23 @@ main(int argc, char **argv) extra_descriptors |= D_TM_INCLUDE_TYPE; if (format == D_TM_CSV) - d_tm_print_field_descriptors(extra_descriptors, stdout); + filter &= ~D_TM_DIRECTORY; + else + filter |= D_TM_DIRECTORY; - while ((num_iter == 0) || (iteration < num_iter)) { - d_tm_iterate(ctx, root, 0, filter, NULL, format, extra_descriptors, - ops, stdout); - iteration++; - sleep(delay); - if (format == D_TM_STANDARD) - printf("\n\n"); + if (strlen(jobid) > 0) { + srv_idx = DC_TM_JOB_ROOT_ID; + snprintf(dirname, sizeof(dirname), "%s", jobid); } - d_tm_close(&ctx); - return 0; - -failure: - printf("Unable to attach to the shared memory for the server index: %d" - "\nMake sure to run the I/O Engine with the same index to " - "initialize the shared memory and populate it with metrics.\n" - "Verify user/group settings match those that started the I/O " - "Engine.\n", - srv_idx); - d_tm_close(&ctx); - return -1; + /* fetch metrics from server side */ + rc = process_metrics(srv_idx, dirname, format, filter, extra_descriptors, + delay, num_iter, iter_cb, stdout); + if (rc) + printf("Unable to attach to the shared memory for the server index: %d" + "\nMake sure to run the I/O Engine with the same index to " + "initialize the shared memory and populate it with metrics.\n" + "Verify user/group settings match those that started the I/O " + "Engine.\n", srv_idx); + return rc != 0 ? -1 : 0; } From 13b44a215846f76ed44f276b7407008aae4b33ed Mon Sep 17 00:00:00 2001 From: Di Wang Date: Tue, 2 Jan 2024 23:55:08 +0000 Subject: [PATCH 02/18] DAOS-8331 pool: add per pool metrics Add per pool metrics for object module Required-githooks: true Signed-off-by: Di Wang --- src/client/api/init.c | 5 +- src/client/api/metrics.c | 3 +- src/common/SConscript | 2 +- src/common/metrics.c | 133 +++++++++++++++++++++++++++++ src/container/srv.c | 3 +- src/dtx/dtx_srv.c | 3 +- src/engine/module.c | 7 +- src/include/daos/metric.h | 19 ----- src/include/daos/metrics.h | 82 ++++++++++++++++++ src/include/daos/pool.h | 4 + src/include/daos/tls.h | 9 +- src/include/daos_srv/daos_engine.h | 21 +---- src/object/cli_mod.c | 32 ++++++- src/object/cli_shard.c | 31 ++++--- src/object/obj_internal.h | 26 +++++- src/object/obj_utils.c | 98 ++++++++++++++++++++- src/object/srv_internal.h | 22 +---- src/object/srv_mod.c | 96 +-------------------- src/pool/cli.c | 75 +++++++++++++++- src/pool/cli_internal.h | 2 + src/pool/srv.c | 3 +- src/pool/srv_metrics.c | 16 +--- src/vos/vos_common.c | 3 +- 23 files changed, 493 insertions(+), 202 deletions(-) create mode 100644 src/common/metrics.c delete mode 100644 src/include/daos/metric.h create mode 100644 src/include/daos/metrics.h diff --git a/src/client/api/init.c b/src/client/api/init.c index b357e2088da..88685217e41 100644 --- a/src/client/api/init.c +++ b/src/client/api/init.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -23,7 +23,7 @@ #include #include #include -#include +#include #if BUILD_PIPELINE #include #endif @@ -316,6 +316,7 @@ daos_fini(void) D_GOTO(unlock, rc); } + daos_metrics_fini(); #if BUILD_PIPELINE dc_pipeline_fini(); #endif diff --git a/src/client/api/metrics.c b/src/client/api/metrics.c index abdb0d09fd1..f0015962642 100644 --- a/src/client/api/metrics.c +++ b/src/client/api/metrics.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2020-2023 Intel Corporation. + * (C) Copyright 2020-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/common/SConscript b/src/common/SConscript index 432b72403e5..1d06acd61e0 100644 --- a/src/common/SConscript +++ b/src/common/SConscript @@ -9,7 +9,7 @@ COMMON_FILES = ['debug.c', 'mem.c', 'fail_loc.c', 'lru.c', 'dedup.c', 'profile.c', 'compression.c', 'compression_isal.c', 'compression_qat.c', 'multihash.c', 'multihash_isal.c', 'cipher.c', 'cipher_isal.c', 'qat.c', 'fault_domain.c', - 'policy.c', 'tls.c'] + 'policy.c', 'tls.c', 'metrics.c'] def build_daos_common(denv, client): diff --git a/src/common/metrics.c b/src/common/metrics.c new file mode 100644 index 00000000000..2ac5b2ad932 --- /dev/null +++ b/src/common/metrics.c @@ -0,0 +1,133 @@ +/** + * (C) Copyright 2016-2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * It implements thread-local storage (TLS) for DAOS. + */ +#include +#include +#include + +struct metrics_list { + struct daos_module_metrics *mm_metrics; + d_list_t mm_list; + uint32_t mm_id; +}; + +/* Track list of loaded modules */ +D_LIST_HEAD(metrics_mod_list); +pthread_mutex_t metrics_mod_list_lock = PTHREAD_MUTEX_INITIALIZER; + +int +daos_register_metrics(enum daos_module_tag tag, uint32_t id, + struct daos_module_metrics *metrics) +{ + struct metrics_list *ml; + + D_ALLOC_PTR(ml); + if (ml == NULL) + return -DER_NOMEM; + ml->mm_metrics = metrics; + ml->mm_id = id; + D_MUTEX_LOCK(&metrics_mod_list_lock); + d_list_add_tail(&ml->mm_list, &metrics_mod_list); + D_MUTEX_UNLOCK(&metrics_mod_list_lock); + + return 0; +} + +void +daos_metrics_fini(void) +{ + struct metrics_list *ml; + struct metrics_list *tmp; + + D_MUTEX_LOCK(&metrics_mod_list_lock); + d_list_for_each_entry_safe(ml, tmp, &metrics_mod_list, mm_list) { + d_list_del_init(&ml->mm_list); + D_FREE(ml); + } + D_MUTEX_UNLOCK(&metrics_mod_list_lock); +} + +void +daos_module_fini_metrics(enum dss_module_tag tag, void **metrics) +{ + struct metrics_list *ml; + + D_MUTEX_LOCK(&metrics_mod_list_lock); + d_list_for_each_entry(ml, &metrics_mod_list, mm_list) { + struct daos_module_metrics *met = ml->mm_metrics; + + if (met == NULL) + continue; + if ((met->dmm_tags & tag) == 0) + continue; + if (met->dmm_fini == NULL) + continue; + if (metrics[ml->mm_id] == NULL) + continue; + + met->dmm_fini(metrics[ml->mm_id]); + } + D_MUTEX_UNLOCK(&metrics_mod_list_lock); +} + +int +daos_module_init_metrics(enum dss_module_tag tag, void **metrics, + const char *path, int tgt_id) +{ + struct metrics_list *ml; + + D_MUTEX_LOCK(&metrics_mod_list_lock); + d_list_for_each_entry(ml, &metrics_mod_list, mm_list) { + struct daos_module_metrics *met = ml->mm_metrics; + + if (met == NULL) + continue; + if ((met->dmm_tags & tag) == 0) + continue; + if (met->dmm_init == NULL) + continue; + + metrics[ml->mm_id] = met->dmm_init(path, tgt_id); + if (metrics[ml->mm_id] == NULL) { + D_ERROR("failed to allocate per-pool metrics for module %u\n", ml->mm_id); + D_MUTEX_UNLOCK(&metrics_mod_list_lock); + daos_module_fini_metrics(tag, metrics); + return -DER_NOMEM; + } + } + D_MUTEX_UNLOCK(&metrics_mod_list_lock); + + return 0; +} + +/** + * Query all modules for the number of per-pool metrics they create. + * + * \return Total number of metrics for all modules + */ +int +daos_module_nr_pool_metrics(void) +{ + struct metrics_list *ml; + int total = 0; + + d_list_for_each_entry(ml, &metrics_mod_list, mm_list) { + struct daos_module_metrics *met = ml->mm_metrics; + + if (met == NULL) + continue; + if (met->dmm_nr_metrics == NULL) + continue; + if (!(met->dmm_tags & DAOS_CLI_TAG)) + continue; + + total += met->dmm_nr_metrics(); + } + + return total; +} diff --git a/src/container/srv.c b/src/container/srv.c index d48c8794076..54b3ff3f885 100644 --- a/src/container/srv.c +++ b/src/container/srv.c @@ -12,6 +12,7 @@ #define D_LOGFAC DD_FAC(container) #include +#include #include #include "rpc.h" #include "srv_internal.h" @@ -138,7 +139,7 @@ struct dss_module_key cont_module_key = { .dmk_fini = dsm_tls_fini, }; -struct dss_module_metrics cont_metrics = { +struct daos_module_metrics cont_metrics = { .dmm_tags = DAOS_SYS_TAG, .dmm_init = ds_cont_metrics_alloc, .dmm_fini = ds_cont_metrics_free, diff --git a/src/dtx/dtx_srv.c b/src/dtx/dtx_srv.c index 9cc5a518335..74cf744c3f5 100644 --- a/src/dtx/dtx_srv.c +++ b/src/dtx/dtx_srv.c @@ -9,6 +9,7 @@ #define D_LOGFAC DD_FAC(dtx) #include +#include #include #include #include @@ -128,7 +129,7 @@ dtx_metrics_count(void) return (sizeof(struct dtx_pool_metrics) / sizeof(struct d_tm_node_t *)); } -struct dss_module_metrics dtx_metrics = { +struct daos_module_metrics dtx_metrics = { .dmm_tags = DAOS_TGT_TAG, .dmm_init = dtx_metrics_alloc, .dmm_fini = dtx_metrics_free, diff --git a/src/engine/module.c b/src/engine/module.c index ce33609aeba..4ee74235ff5 100644 --- a/src/engine/module.c +++ b/src/engine/module.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include "drpc_handler.h" @@ -387,7 +388,7 @@ dss_module_init_metrics(enum dss_module_tag tag, void **metrics, struct loaded_mod *mod; d_list_for_each_entry(mod, &loaded_mod_list, lm_lk) { - struct dss_module_metrics *met = mod->lm_dss_mod->sm_metrics; + struct daos_module_metrics *met = mod->lm_dss_mod->sm_metrics; if (met == NULL) continue; @@ -415,7 +416,7 @@ dss_module_fini_metrics(enum dss_module_tag tag, void **metrics) struct loaded_mod *mod; d_list_for_each_entry(mod, &loaded_mod_list, lm_lk) { - struct dss_module_metrics *met = mod->lm_dss_mod->sm_metrics; + struct daos_module_metrics *met = mod->lm_dss_mod->sm_metrics; if (met == NULL) continue; @@ -442,7 +443,7 @@ dss_module_nr_pool_metrics(void) int total = 0, nr; d_list_for_each_entry(mod, &loaded_mod_list, lm_lk) { - struct dss_module_metrics *met = mod->lm_dss_mod->sm_metrics; + struct daos_module_metrics *met = mod->lm_dss_mod->sm_metrics; if (met == NULL) continue; diff --git a/src/include/daos/metric.h b/src/include/daos/metric.h deleted file mode 100644 index 9417b52fdc9..00000000000 --- a/src/include/daos/metric.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * (C) Copyright 2020-2023 Intel Corporation. - * - * SPDX-License-Identifier: BSD-2-Clause-Patent - */ -#ifndef __DAOS_METRIC_H__ -#define __DAOS_METRIC_H__ - -/** - * Called during library initialization to init metrics. - */ -int dc_tm_init(void); - -/** - * Called during library finalization to free metrics resources - */ -void dc_tm_fini(void); - -#endif /* __DAOS_TM_H__ */ diff --git a/src/include/daos/metrics.h b/src/include/daos/metrics.h new file mode 100644 index 00000000000..797ef89fa4d --- /dev/null +++ b/src/include/daos/metrics.h @@ -0,0 +1,82 @@ +/** + * (C) Copyright 2016-2024 Intel Corporation. + * + * SPDX-License-Identifier: BSD-2-Clause-Patent + */ +/** + * This file is part of daos + * + * src/include/daos/metrics.h + */ + +#ifndef __DAOS_METRICS_H__ +#define __DAOS_METRICS_H__ + +#include +#include +#include +#include + +/* For now TLS is only enabled if metrics are enabled */ +#define METRIC_DUMP_ENV "DAOS_METRIC_DUMP_ENV" +#define DAOS_CLIENT_METRICS_ENV "DAOS_CLIENT_METRICS" +#define DAOS_CLIENT_METRICS_RETAIN_ENV "DAOS_CLIENT_METRICS_RETAIN" +extern bool daos_client_metric; +extern bool daos_client_metric_retain; + +struct daos_module_metrics { + /* Indicate where the keys should be instantiated */ + enum daos_module_tag dmm_tags; + + /** + * allocate metrics with path to ephemeral shmem for to the + * newly-created pool + */ + void *(*dmm_init)(const char *path, int tgt_id); + void (*dmm_fini)(void *data); + + /** + * Get the number of metrics allocated by this module in total (including all targets). + */ + int (*dmm_nr_metrics)(void); +}; + +/* Estimate of bytes per typical metric node */ +#define NODE_BYTES (sizeof(struct d_tm_node_t) + \ + sizeof(struct d_tm_metric_t) + \ + 64 /* buffer for metadata */) +/* Estimate of bytes per histogram bucket */ +#define BUCKET_BYTES (sizeof(struct d_tm_bucket_t) + NODE_BYTES) +/* + Estimate of bytes per metric. + This is a generous high-water mark assuming most metrics are not using + histograms. May need adjustment if the balance of metrics changes. +*/ +#define PER_METRIC_BYTES (NODE_BYTES + sizeof(struct d_tm_stats_t) + \ + sizeof(struct d_tm_histogram_t) + \ + BUCKET_BYTES) + +int +daos_register_metrics(enum daos_module_tag tag, uint32_t id, struct daos_module_metrics *metrics); +void +daos_metrics_fini(void); +int +daos_module_init_metrics(enum dss_module_tag tag, void **metrics, + const char *path, int tgt_id); +void +daos_module_fini_metrics(enum dss_module_tag tag, void **metrics); + +int +daos_module_nr_pool_metrics(void); + +/** + * Called during library initialization to init metrics. + */ +int dc_tm_init(void); + +/** + * Called during library finalization to free metrics resources + */ +void dc_tm_fini(void); + +#endif /*__DAOS_METRICS_H__*/ diff --git a/src/include/daos/pool.h b/src/include/daos/pool.h index 370f626f1f0..52113adf534 100644 --- a/src/include/daos/pool.h +++ b/src/include/daos/pool.h @@ -14,9 +14,11 @@ #include #include +#include #include #include #include +#include #include #include @@ -93,6 +95,8 @@ struct dc_pool { pthread_rwlock_t dp_map_lock; struct pool_map *dp_map; tse_task_t *dp_map_task; + void *dp_metrics[DAOS_NR_MODULE]; + char dp_path[D_TM_MAX_NAME_LEN]; /* highest known pool map version */ uint32_t dp_map_version_known; uint32_t dp_disconnecting:1, diff --git a/src/include/daos/tls.h b/src/include/daos/tls.h index 446ff53c180..08ea38388b8 100644 --- a/src/include/daos/tls.h +++ b/src/include/daos/tls.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -65,12 +65,7 @@ int ds_tls_key_create(void); int dc_tls_key_create(void); void ds_tls_key_delete(void); void dc_tls_key_delete(void); -/* For now TLS is only enabled if metrics are enabled */ -#define METRIC_DUMP_ENV "DAOS_METRIC_DUMP_ENV" -#define DAOS_CLIENT_METRICS_ENV "DAOS_CLIENT_METRICS" -#define DAOS_CLIENT_METRICS_RETAIN_ENV "DAOS_CLIENT_METRICS_RETAIN" -extern bool daos_client_metric; -extern bool daos_client_metric_retain; + struct daos_module_key* daos_get_module_key(int index); /** diff --git a/src/include/daos_srv/daos_engine.h b/src/include/daos_srv/daos_engine.h index 4aea3fa4dca..6007c4368e0 100644 --- a/src/include/daos_srv/daos_engine.h +++ b/src/include/daos_srv/daos_engine.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -341,23 +341,6 @@ struct dss_module_ops { int srv_profile_stop(); int srv_profile_start(char *path, int avg); -struct dss_module_metrics { - /* Indicate where the keys should be instantiated */ - enum dss_module_tag dmm_tags; - - /** - * allocate metrics with path to ephemeral shmem for to the - * newly-created pool - */ - void *(*dmm_init)(const char *path, int tgt_id); - void (*dmm_fini)(void *data); - - /** - * Get the number of metrics allocated by this module in total (including all targets). - */ - int (*dmm_nr_metrics)(void); -}; - /** * Each module should provide a dss_module structure which defines the module * interface. The name of the allocated structure must be the library name @@ -403,7 +386,7 @@ struct dss_module { struct dss_module_ops *sm_mod_ops; /* Per-pool metrics (optional) */ - struct dss_module_metrics *sm_metrics; + struct daos_module_metrics *sm_metrics; }; /** diff --git a/src/object/cli_mod.c b/src/object/cli_mod.c index 54deafb3b61..a9db8e7bf2c 100644 --- a/src/object/cli_mod.c +++ b/src/object/cli_mod.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,26 @@ struct daos_module_key dc_obj_module_key = { .dmk_fini = dc_obj_tls_fini, }; +static void* +dc_obj_metrics_alloc(const char *path, int tgt_id) +{ + return obj_metrics_alloc_internal(path, tgt_id, false); +} + +static void +dc_obj_metrics_free(void *data) +{ + D_FREE(data); +} + +/* metrics per pool */ +struct daos_module_metrics dc_obj_metrics = { + .dmm_tags = DAOS_CLI_TAG, + .dmm_init = dc_obj_metrics_alloc, + .dmm_fini = dc_obj_metrics_free, + .dmm_nr_metrics = obj_metrics_count, +}; + /** * Initialize object interface */ @@ -117,8 +138,15 @@ dc_obj_init(void) int rc; d_getenv_bool(DAOS_CLIENT_METRICS_ENV, &daos_client_metric); - if (daos_client_metric) + if (daos_client_metric) { daos_register_key(&dc_obj_module_key); + rc = daos_register_metrics(DAOS_CLI_TAG, DAOS_OBJ_MODULE, + &dc_obj_metrics); + if (rc) { + DL_ERROR(rc, "register object failed"); + return rc; + } + } rc = obj_utils_init(); if (rc) diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index 6c28d83150e..b39be6b7279 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include "cli_csum.h" @@ -683,23 +684,21 @@ obj_shard_update_metrics_end(crt_rpc_t *rpc, uint64_t send_time, void *arg, int { struct dc_obj_tls *tls; struct rw_cb_args *rw_args; - struct obj_rw_in *orw; + struct dc_pool *pool; + struct obj_rw_in *orw; struct d_tm_node_t *lat = NULL; + struct obj_pool_metrics *opm = NULL; daos_size_t size; uint64_t time; int opc; - if (!daos_client_metric) + if (!daos_client_metric || ret != 0) return; - tls = dc_obj_tls_get(); D_ASSERT(tls != NULL); opc = opc_get(rpc->cr_opc); orw = crt_req_get(rpc); d_tm_dec_gauge(tls->cot_op_active[opc], 1); - - if (ret != 0) - return; /** * Measure latency of successful I/O only. * Use bit shift for performance and tolerate some inaccuracy. @@ -709,14 +708,20 @@ obj_shard_update_metrics_end(crt_rpc_t *rpc, uint64_t send_time, void *arg, int switch (opc) { case DAOS_OBJ_RPC_UPDATE: - rw_args = arg; - size = daos_sgls_packed_size(rw_args->rwaa_sgls, orw->orw_nr, NULL); - lat = tls->cot_update_lat[lat_bucket(size)]; - break; case DAOS_OBJ_RPC_FETCH: rw_args = arg; - size = obj_get_fetch_size(rw_args); - lat = tls->cot_fetch_lat[lat_bucket(size)]; + pool = rw_args->shard_args->auxi.obj_auxi->obj->cob_pool; + D_ASSERT(pool != NULL); + opm = pool->dp_metrics[DAOS_OBJ_MODULE]; + if (opc == DAOS_OBJ_RPC_UPDATE) { + size = daos_sgls_packed_size(rw_args->rwaa_sgls, orw->orw_nr, NULL); + d_tm_inc_counter(opm->opm_update_bytes, size); + lat = tls->cot_update_lat[lat_bucket(size)]; + } else { + size = obj_get_fetch_size(rw_args); + lat = tls->cot_fetch_lat[lat_bucket(size)]; + d_tm_inc_counter(opm->opm_fetch_bytes, size); + } break; default: lat = tls->cot_op_lat[opc]; diff --git a/src/object/obj_internal.h b/src/object/obj_internal.h index 4f905867986..bf2059914ef 100644 --- a/src/object/obj_internal.h +++ b/src/object/obj_internal.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -621,6 +621,30 @@ dc_obj_tls_get() return daos_module_key_get(dtls, &dc_obj_module_key); } +struct obj_pool_metrics { + /** Count number of total per-opcode requests (type = counter) */ + struct d_tm_node_t *opm_total[OBJ_PROTO_CLI_COUNT]; + /** Total number of bytes fetched (type = counter) */ + struct d_tm_node_t *opm_fetch_bytes; + /** Total number of bytes updated (type = counter) */ + struct d_tm_node_t *opm_update_bytes; + + /** Total number of silently restarted updates (type = counter) */ + struct d_tm_node_t *opm_update_restart; + /** Total number of resent update operations (type = counter) */ + struct d_tm_node_t *opm_update_resent; + /** Total number of retry update operations (type = counter) */ + struct d_tm_node_t *opm_update_retry; + /** Total number of EC full-stripe update operations (type = counter) */ + struct d_tm_node_t *opm_update_ec_full; + /** Total number of EC partial update operations (type = counter) */ + struct d_tm_node_t *opm_update_ec_partial; +}; + +void obj_metrics_free(void *data); +int obj_metrics_count(void); +void * obj_metrics_alloc_internal(const char *path, int tgt_id, bool server); + static inline unsigned int lat_bucket(uint64_t size) { diff --git a/src/object/obj_utils.c b/src/object/obj_utils.c index 10f97b69ea3..77ab850b1ab 100644 --- a/src/object/obj_utils.c +++ b/src/object/obj_utils.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2018-2023 Intel Corporation. + * (C) Copyright 2018-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -144,6 +144,102 @@ obj_latency_tm_init(uint32_t opc, int tgt_id, struct d_tm_node_t **tm, char *op, return rc; } +void +obj_metrics_free(void *data) +{ + D_FREE(data); +} + +int +obj_metrics_count(void) +{ + return (sizeof(struct obj_pool_metrics) / sizeof(struct d_tm_node_t *)); +} + + +void * +obj_metrics_alloc_internal(const char *path, int tgt_id, bool server) +{ + struct obj_pool_metrics *metrics; + uint32_t opc; + int rc; + + D_ASSERT(tgt_id >= 0); + + D_ALLOC_PTR(metrics); + if (metrics == NULL) + return NULL; + + /** register different per-opcode counters */ + for (opc = 0; opc < OBJ_PROTO_CLI_COUNT; opc++) { + /** Then the total number of requests, of type counter */ + rc = d_tm_add_metric(&metrics->opm_total[opc], D_TM_COUNTER, + "total number of processed object RPCs", + "ops", "%s/ops/%s/%s%u", path, + obj_opc_to_str(opc), server ? "tgt_" : "", tgt_id); + if (rc) + D_WARN("Failed to create total counter: "DF_RC"\n", + DP_RC(rc)); + } + + /** Total number of silently restarted updates, of type counter */ + rc = d_tm_add_metric(&metrics->opm_update_restart, D_TM_COUNTER, + "total number of restarted update ops", "updates", + "%s/restarted/%s%u", path, server ? "tgt_" : "", tgt_id); + if (rc) + D_WARN("Failed to create restarted counter: "DF_RC"\n", + DP_RC(rc)); + + /** Total number of resent updates, of type counter */ + rc = d_tm_add_metric(&metrics->opm_update_resent, D_TM_COUNTER, + "total number of resent update RPCs", "updates", + "%s/resent/%s%u", path, server ? "tgt_" : "", tgt_id); + if (rc) + D_WARN("Failed to create resent counter: "DF_RC"\n", + DP_RC(rc)); + + /** Total number of retry updates locally, of type counter */ + rc = d_tm_add_metric(&metrics->opm_update_retry, D_TM_COUNTER, + "total number of retried update RPCs", "updates", + "%s/retry/%s%u", path, server ? "tgt_" : "", tgt_id); + if (rc) + D_WARN("Failed to create retry cnt sensor: "DF_RC"\n", DP_RC(rc)); + + /** Total bytes read */ + rc = d_tm_add_metric(&metrics->opm_fetch_bytes, D_TM_COUNTER, + "total number of bytes fetched/read", "bytes", + "%s/xferred/fetch/%s%u", path, server ? "tgt_" : "", tgt_id); + if (rc) + D_WARN("Failed to create bytes fetch counter: "DF_RC"\n", + DP_RC(rc)); + + /** Total bytes written */ + rc = d_tm_add_metric(&metrics->opm_update_bytes, D_TM_COUNTER, + "total number of bytes updated/written", "bytes", + "%s/xferred/update/%s%u", path, server ? "tgt_" : "", tgt_id); + if (rc) + D_WARN("Failed to create bytes update counter: "DF_RC"\n", + DP_RC(rc)); + + /** Total number of EC full-stripe update operations, of type counter */ + rc = d_tm_add_metric(&metrics->opm_update_ec_full, D_TM_COUNTER, + "total number of EC sull-stripe updates", "updates", + "%s/EC_update/full_stripe/%s%u", path, server ? "tgt_" : "", tgt_id); + if (rc) + D_WARN("Failed to create EC full stripe update counter: "DF_RC"\n", + DP_RC(rc)); + + /** Total number of EC partial update operations, of type counter */ + rc = d_tm_add_metric(&metrics->opm_update_ec_partial, D_TM_COUNTER, + "total number of EC sull-partial updates", "updates", + "%s/EC_update/partial/%s%u", path, server ? "tgt_" : "", tgt_id); + if (rc) + D_WARN("Failed to create EC partial update counter: "DF_RC"\n", + DP_RC(rc)); + + return metrics; +} + struct recx_rec { daos_recx_t *rr_recx; diff --git a/src/object/srv_internal.h b/src/object/srv_internal.h index a4953c78631..333dd03f06b 100644 --- a/src/object/srv_internal.h +++ b/src/object/srv_internal.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -107,26 +107,6 @@ struct migrate_pool_tls { void migrate_pool_tls_destroy(struct migrate_pool_tls *tls); -struct obj_pool_metrics { - /** Count number of total per-opcode requests (type = counter) */ - struct d_tm_node_t *opm_total[OBJ_PROTO_CLI_COUNT]; - /** Total number of bytes fetched (type = counter) */ - struct d_tm_node_t *opm_fetch_bytes; - /** Total number of bytes updated (type = counter) */ - struct d_tm_node_t *opm_update_bytes; - - /** Total number of silently restarted updates (type = counter) */ - struct d_tm_node_t *opm_update_restart; - /** Total number of resent update operations (type = counter) */ - struct d_tm_node_t *opm_update_resent; - /** Total number of retry update operations (type = counter) */ - struct d_tm_node_t *opm_update_retry; - /** Total number of EC full-stripe update operations (type = counter) */ - struct d_tm_node_t *opm_update_ec_full; - /** Total number of EC partial update operations (type = counter) */ - struct d_tm_node_t *opm_update_ec_partial; -}; - struct obj_tls { d_sg_list_t ot_echo_sgl; d_list_t ot_pool_list; diff --git a/src/object/srv_mod.c b/src/object/srv_mod.c index 692848552a5..9c23662c308 100644 --- a/src/object/srv_mod.c +++ b/src/object/srv_mod.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -12,6 +12,7 @@ #include #include #include +#include #include "obj_rpc.h" #include "srv_internal.h" @@ -434,99 +435,10 @@ static struct dss_module_ops ds_obj_mod_ops = { static void * obj_metrics_alloc(const char *path, int tgt_id) { - struct obj_pool_metrics *metrics; - uint32_t opc; - int rc; - - D_ASSERT(tgt_id >= 0); - - D_ALLOC_PTR(metrics); - if (metrics == NULL) - return NULL; - - /** register different per-opcode counters */ - for (opc = 0; opc < OBJ_PROTO_CLI_COUNT; opc++) { - /** Then the total number of requests, of type counter */ - rc = d_tm_add_metric(&metrics->opm_total[opc], D_TM_COUNTER, - "total number of processed object RPCs", - "ops", "%s/ops/%s/tgt_%u", path, - obj_opc_to_str(opc), tgt_id); - if (rc) - D_WARN("Failed to create total counter: "DF_RC"\n", - DP_RC(rc)); - } - - /** Total number of silently restarted updates, of type counter */ - rc = d_tm_add_metric(&metrics->opm_update_restart, D_TM_COUNTER, - "total number of restarted update ops", "updates", - "%s/restarted/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create restarted counter: "DF_RC"\n", - DP_RC(rc)); - - /** Total number of resent updates, of type counter */ - rc = d_tm_add_metric(&metrics->opm_update_resent, D_TM_COUNTER, - "total number of resent update RPCs", "updates", - "%s/resent/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create resent counter: "DF_RC"\n", - DP_RC(rc)); - - /** Total number of retry updates locally, of type counter */ - rc = d_tm_add_metric(&metrics->opm_update_retry, D_TM_COUNTER, - "total number of retried update RPCs", "updates", - "%s/retry/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create retry cnt sensor: "DF_RC"\n", DP_RC(rc)); - - /** Total bytes read */ - rc = d_tm_add_metric(&metrics->opm_fetch_bytes, D_TM_COUNTER, - "total number of bytes fetched/read", "bytes", - "%s/xferred/fetch/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create bytes fetch counter: "DF_RC"\n", - DP_RC(rc)); - - /** Total bytes written */ - rc = d_tm_add_metric(&metrics->opm_update_bytes, D_TM_COUNTER, - "total number of bytes updated/written", "bytes", - "%s/xferred/update/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create bytes update counter: "DF_RC"\n", - DP_RC(rc)); - - /** Total number of EC full-stripe update operations, of type counter */ - rc = d_tm_add_metric(&metrics->opm_update_ec_full, D_TM_COUNTER, - "total number of EC sull-stripe updates", "updates", - "%s/EC_update/full_stripe/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create EC full stripe update counter: "DF_RC"\n", - DP_RC(rc)); - - /** Total number of EC partial update operations, of type counter */ - rc = d_tm_add_metric(&metrics->opm_update_ec_partial, D_TM_COUNTER, - "total number of EC sull-partial updates", "updates", - "%s/EC_update/partial/tgt_%u", path, tgt_id); - if (rc) - D_WARN("Failed to create EC partial update counter: "DF_RC"\n", - DP_RC(rc)); - - return metrics; -} - -static void -obj_metrics_free(void *data) -{ - D_FREE(data); -} - -static int -obj_metrics_count(void) -{ - return (sizeof(struct obj_pool_metrics) / sizeof(struct d_tm_node_t *)); + return obj_metrics_alloc_internal(path, tgt_id, false); } -struct dss_module_metrics obj_metrics = { +struct daos_module_metrics obj_metrics = { .dmm_tags = DAOS_TGT_TAG, .dmm_init = obj_metrics_alloc, .dmm_fini = obj_metrics_free, diff --git a/src/pool/cli.c b/src/pool/cli.c index 5b5283f2573..add048e81da 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,9 +15,13 @@ #define D_LOGFAC DD_FAC(pool) #include +#include +#include #include #include #include +#include +#include #include #include #include @@ -107,6 +111,9 @@ pool_free(struct d_hlink *hlink) D_MUTEX_DESTROY(&pool->dp_client_lock); D_RWLOCK_DESTROY(&pool->dp_co_list_lock); + if (pool->dp_metrics != NULL) + dc_pool_metrics_stop(pool); + if (pool->dp_map != NULL) pool_map_decref(pool->dp_map); @@ -558,6 +565,68 @@ pool_connect_cp(tse_task_t *task, void *data) return rc; } +/** + * Destroy metrics for a specific pool. + * + * \param[in] pool pointer to ds_pool structure + */ +void +dc_pool_metrics_stop(struct dc_pool *pool) +{ + int rc; + + if (!daos_client_metric) + return; + + daos_module_fini_metrics(DAOS_CLI_TAG, pool->dp_metrics); + if (!daos_client_metric_retain) { + rc = d_tm_del_ephemeral_dir(pool->dp_path); + if (rc != 0) { + D_WARN(DF_UUID ": failed to remove pool metrics dir for pool: " + DF_RC"\n", DP_UUID(pool->dp_pool), DP_RC(rc)); + return; + } + } + + D_INFO(DF_UUID ": destroyed ds_pool metrics: %s\n", DP_UUID(pool->dp_pool), pool->dp_path); +} + +int +dc_pool_metrics_start(struct dc_pool *pool) +{ + int pid = getpid(); + size_t size = daos_module_nr_pool_metrics() * PER_METRIC_BYTES; + int rc; + + if (!daos_client_metric) + return 0; + + snprintf(pool->dp_path, sizeof(pool->dp_path), "%s/%u/pool/"DF_UUIDF, + dc_jobid, pid, DP_UUID(pool->dp_pool)); + + /** create new shmem space for per-pool metrics */ + rc = d_tm_add_ephemeral_dir(NULL, size, pool->dp_path); + if (rc != 0) { + D_WARN(DF_UUID ": failed to create metrics dir for pool: " + DF_RC "\n", DP_UUID(pool->dp_pool), DP_RC(rc)); + return rc; + } + + /* initialize metrics on the system xstream for each module */ + rc = daos_module_init_metrics(DAOS_CLI_TAG, pool->dp_metrics, + pool->dp_path, pid); + if (rc != 0) { + D_WARN(DF_UUID ": failed to initialize module metrics: " + DF_RC"\n", DP_UUID(pool->dp_pool), DP_RC(rc)); + dc_pool_metrics_stop(pool); + return rc; + } + + D_INFO(DF_UUID ": created metrics for pool %s\n", DP_UUID(pool->dp_pool), pool->dp_path); + + return 0; +} + /* allocate and initialize a dc_pool by label or uuid */ static int init_pool(const char *label, uuid_t uuid, uint64_t capas, const char *grp, @@ -582,6 +651,10 @@ init_pool(const char *label, uuid_t uuid, uint64_t capas, const char *grp, if (rc != 0) D_GOTO(err_pool, rc); + rc = dc_pool_metrics_start(pool); + if (rc != 0) + D_GOTO(err_pool, rc); + /** Agent configuration data from pool->dp_sys->sy_info */ /** sy_info.provider */ /** sy_info.interface */ diff --git a/src/pool/cli_internal.h b/src/pool/cli_internal.h index f8f965b4469..0c14d5a1888 100644 --- a/src/pool/cli_internal.h +++ b/src/pool/cli_internal.h @@ -15,5 +15,7 @@ void dc_pool_hdl_unlink(struct dc_pool *pool); struct dc_pool *dc_pool_alloc(unsigned int nr); int dc_pool_map_update(struct dc_pool *pool, struct pool_map *map, bool connect); +int dc_pool_metrics_start(struct dc_pool *pool); +void dc_pool_metrics_stop(struct dc_pool *pool); #endif /* __POOL_CLIENT_INTERNAL_H__ */ diff --git a/src/pool/srv.c b/src/pool/srv.c index c164ddef3a8..1b36283bcb8 100644 --- a/src/pool/srv.c +++ b/src/pool/srv.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include "rpc.h" @@ -170,7 +171,7 @@ struct dss_module_key pool_module_key = { .dmk_fini = pool_tls_fini, }; -struct dss_module_metrics pool_metrics = { +struct daos_module_metrics pool_metrics = { .dmm_tags = DAOS_SYS_TAG, .dmm_init = ds_pool_metrics_alloc, .dmm_fini = ds_pool_metrics_free, diff --git a/src/pool/srv_metrics.c b/src/pool/srv_metrics.c index 0ca5b494df1..d4a2ba6fcbf 100644 --- a/src/pool/srv_metrics.c +++ b/src/pool/srv_metrics.c @@ -8,24 +8,10 @@ #include "srv_internal.h" #include +#include #include -/* Estimate of bytes per typical metric node */ -#define NODE_BYTES (sizeof(struct d_tm_node_t) + \ - sizeof(struct d_tm_metric_t) + \ - 64 /* buffer for metadata */) -/* Estimate of bytes per histogram bucket */ -#define BUCKET_BYTES (sizeof(struct d_tm_bucket_t) + NODE_BYTES) -/* - Estimate of bytes per metric. - This is a generous high-water mark assuming most metrics are not using - histograms. May need adjustment if the balance of metrics changes. -*/ -#define PER_METRIC_BYTES (NODE_BYTES + sizeof(struct d_tm_stats_t) + \ - sizeof(struct d_tm_histogram_t) + \ - BUCKET_BYTES) - /** * Initializes the pool metrics */ diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index 10d433ff1d5..3b50e222708 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -821,7 +822,7 @@ vos_metrics_alloc(const char *path, int tgt_id) return vp_metrics; } -struct dss_module_metrics vos_metrics = { +struct daos_module_metrics vos_metrics = { .dmm_tags = DAOS_TGT_TAG, .dmm_init = vos_metrics_alloc, .dmm_fini = vos_metrics_free, From 5dfad80d24c83c5711ea62e9217f1149fb0ab5a4 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Wed, 3 Jan 2024 20:38:36 +0000 Subject: [PATCH 03/18] DAOS-8331 object: fix metrics format fix metrics format. Required-githooks: true Signed-off-by: Di Wang --- src/object/cli_mod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/object/cli_mod.c b/src/object/cli_mod.c index a9db8e7bf2c..47b3cb68a95 100644 --- a/src/object/cli_mod.c +++ b/src/object/cli_mod.c @@ -44,7 +44,7 @@ dc_obj_tls_init(int tags, int xs_id, int pid) /** Start with number of active requests, of type gauge */ rc = d_tm_add_metric(&tls->cot_op_active[opc], D_TM_STATS_GAUGE, "number of active object RPCs", "ops", - "%s/%u/%lu/ops/%s/active", dc_jobid, pid, tid, + "%s/%u/%lu/io/ops/%s/active", dc_jobid, pid, tid, obj_opc_to_str(opc)); if (rc) { D_WARN("Failed to create active counter: "DF_RC"\n", DP_RC(rc)); @@ -60,7 +60,7 @@ dc_obj_tls_init(int tags, int xs_id, int pid) /** And finally the per-opcode latency, of type gauge */ rc = d_tm_add_metric(&tls->cot_op_lat[opc], D_TM_STATS_GAUGE, "object RPC processing time", "us", - "%s/%u/%lu/ops/%s/latency", dc_jobid, pid, tid, + "%s/%u/%lu/io/ops/%s/latency", dc_jobid, pid, tid, obj_opc_to_str(opc)); if (rc) { D_WARN("Failed to create latency sensor: "DF_RC"\n", DP_RC(rc)); From f33d9f624404b449a7f0e7969787cc9767c2a1d9 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Wed, 3 Jan 2024 21:29:22 +0000 Subject: [PATCH 04/18] DAOS-8331 pool: fix building failure fix building failure Required-githooks: true Signed-off-by: Di Wang --- src/pool/cli.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pool/cli.c b/src/pool/cli.c index add048e81da..2c152e7c7dc 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -111,8 +111,7 @@ pool_free(struct d_hlink *hlink) D_MUTEX_DESTROY(&pool->dp_client_lock); D_RWLOCK_DESTROY(&pool->dp_co_list_lock); - if (pool->dp_metrics != NULL) - dc_pool_metrics_stop(pool); + dc_pool_metrics_stop(pool); if (pool->dp_map != NULL) pool_map_decref(pool->dp_map); From 402eeb0ce4b283fe76216947b99711f836c69d18 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Fri, 5 Jan 2024 03:12:54 +0000 Subject: [PATCH 05/18] DAOS-8331 client: destroy metrics if not retain Destroy metrics shmem if it is not retain, and a few fixes from Michael. Required-githooks: true Signed-off-by: Di Wang --- src/client/api/metrics.c | 13 ++- src/gurt/telemetry.c | 122 +++++++++++++++++++++----- src/include/daos/pool.h | 5 +- src/include/gurt/telemetry_producer.h | 1 + src/object/cli_shard.c | 1 + src/pool/cli.c | 25 +++++- 6 files changed, 136 insertions(+), 31 deletions(-) diff --git a/src/client/api/metrics.c b/src/client/api/metrics.c index f0015962642..e48d9483df0 100644 --- a/src/client/api/metrics.c +++ b/src/client/api/metrics.c @@ -52,10 +52,15 @@ dc_tm_init(void) return rc; } + D_INFO("INIT %s metrics\n", dc_jobid); + rc = d_tm_add_ephemeral_dir(NULL, MAX_IDS_SIZE(INIT_JOB_NUM), "%s", dc_jobid); + if (rc != 0 && rc != -DER_EXIST) { + DL_ERROR(rc, "add metric %s failed.\n", dc_jobid); + D_GOTO(out, rc); + } + pid = getpid(); D_INFO("INIT %s/%u metrics\n", dc_jobid, pid); - - /** create new shmem space for per-pool metrics */ rc = d_tm_add_ephemeral_dir(NULL, MAX_IDS_SIZE(INIT_JOB_NUM), "%s/%u", dc_jobid, pid); if (rc != 0) { @@ -140,6 +145,10 @@ dc_tm_fini() rc = d_tm_del_ephemeral_dir("%s/%d", dc_jobid, pid); if (rc != 0) DL_ERROR(rc, "delete tm directory %s/%d.", dc_jobid, pid); + + rc = d_tm_try_del_ephemeral_dir("%s", dc_jobid); + if (rc != 0) + DL_ERROR(rc, "delete tm directory %s/%d.", dc_jobid, pid); } D_INFO("delete pid %s/%u\n", dc_jobid, pid); diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index a1eb7ce810a..503242f19e8 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2023 Intel Corporation. + * (C) Copyright 2020-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -854,6 +854,27 @@ d_tm_init(int id, uint64_t mem_size, int flags) return rc; } +/* Check if all children are invalid */ +static bool +is_node_empty(struct d_tm_node_t *node) +{ + struct d_tm_context *ctx = tm_shmem.ctx; + struct d_tm_shmem_hdr *shmem; + struct d_tm_node_t *child; + + shmem = get_shmem_for_key(ctx, node->dtn_shmem_key); + child = conv_ptr(shmem, node->dtn_child); + while (child != NULL && !is_cleared_link(ctx, child)) { + child = conv_ptr(shmem, child->dtn_sibling); + if (child->dtn_name != NULL) { + D_DEBUG(DB_TRACE, "still have child %s\n", child->dtn_name); + return false; + } + } + + return true; +} + /** * Releases resources claimed by init */ @@ -872,7 +893,7 @@ d_tm_fini(void) struct d_tm_node_t *root; root = d_tm_get_root(tm_shmem.ctx); - if (root->dtn_child != NULL) + if (!is_node_empty(root)) destroy_shmem = false; } @@ -2393,14 +2414,18 @@ int d_tm_add_metric(struct d_tm_node_t **node, int metric_type, char *desc, } static void -invalidate_link_node(struct d_tm_node_t *node) +invalidate_link_node(struct d_tm_shmem_hdr *parent, struct d_tm_node_t *node) { if (node == NULL || node->dtn_type != D_TM_LINK) return; node->dtn_name = NULL; - if (node->dtn_metric != NULL) - node->dtn_metric->dtm_data.value = 0; + if (node->dtn_metric != NULL) { + struct d_tm_metric_t *link_metric; + + link_metric = conv_ptr(parent, node->dtn_metric); + link_metric->dtm_data.value = 0; + } } static int @@ -2588,7 +2613,7 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, return 0; fail_link: - invalidate_link_node(link_node); + invalidate_link_node(parent_shmem, link_node); fail_tracking: close_shmem_for_key(ctx, key, true); goto fail_unlock; /* shmem will be closed/destroyed already */ @@ -2681,11 +2706,34 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link) out_link: /* invalidate since the link node can't be deleted from parent */ - invalidate_link_node(link); + invalidate_link_node(parent_shmem, link); out: return rc; } +static int +try_del_ephemeral_dir(char *path, bool force) +{ + struct d_tm_context *ctx = tm_shmem.ctx; + struct d_tm_node_t *link; + int rc = 0; + + rc = d_tm_lock_shmem(); + if (unlikely(rc != 0)) { + D_ERROR("failed to get producer mutex\n"); + return rc; + } + + link = get_node(ctx, path); + if (!force && !is_node_empty(link)) + D_GOTO(unlock, rc == -DER_BUSY); + + rc = rm_ephemeral_dir(ctx, link); + +unlock: + d_tm_unlock_shmem(); + return rc; +} /** * Deletes an ephemeral metrics directory from the metric tree. * @@ -2697,11 +2745,9 @@ rm_ephemeral_dir(struct d_tm_context *ctx, struct d_tm_node_t *link) int d_tm_del_ephemeral_dir(const char *fmt, ...) { - struct d_tm_context *ctx = tm_shmem.ctx; - struct d_tm_node_t *link; - va_list args; - char path[D_TM_MAX_NAME_LEN] = {0}; - int rc = 0; + va_list args; + char path[D_TM_MAX_NAME_LEN] = {0}; + int rc = 0; if (!is_initialized()) D_GOTO(out, rc = -DER_UNINIT); @@ -2717,16 +2763,46 @@ d_tm_del_ephemeral_dir(const char *fmt, ...) if (rc != 0) D_GOTO(out, rc); - rc = d_tm_lock_shmem(); - if (unlikely(rc != 0)) { - D_ERROR("failed to get producer mutex\n"); - D_GOTO(out, rc); + rc = try_del_ephemeral_dir(path, true); +out: + if (rc != 0) + D_ERROR("Failed to remove ephemeral dir: " DF_RC "\n", + DP_RC(rc)); + else + D_INFO("Removed ephemeral directory [%s]\n", path); + return rc; +} + +/** + * Deletes an ephemeral metrics directory from the metric tree, only if it is empty. + * + * \param[in] fmt Used to construct the path to be removed + * + * \return 0 Success + * -DER_INVAL Invalid input + */ +int +d_tm_try_del_ephemeral_dir(const char *fmt, ...) +{ + va_list args; + char path[D_TM_MAX_NAME_LEN] = {0}; + int rc = 0; + + if (!is_initialized()) + D_GOTO(out, rc = -DER_UNINIT); + + if (fmt == NULL || strnlen(fmt, D_TM_MAX_NAME_LEN) == 0) { + D_ERROR("telemetry root cannot be deleted\n"); + D_GOTO(out, rc = -DER_INVAL); } - link = get_node(ctx, path); - rc = rm_ephemeral_dir(ctx, link); + va_start(args, fmt); + rc = parse_path_fmt(path, sizeof(path), fmt, args); + va_end(args); + if (rc != 0) + D_GOTO(out, rc); - d_tm_unlock_shmem(); + rc = try_del_ephemeral_dir(path, false); out: if (rc != 0) D_ERROR("Failed to remove ephemeral dir: " DF_RC "\n", @@ -3613,8 +3689,8 @@ allocate_shared_memory(key_t key, size_t mem_size, D_INIT_LIST_HEAD(&header->sh_subregions); - D_DEBUG(DB_MEM, "Created shared memory region for key 0x%x, size=%lu\n", - key, mem_size); + D_DEBUG(DB_MEM, "Created shared memory region for key 0x%x, size=%lu header %p base %p free %p\n", + key, mem_size, header, (void *)header->sh_base_addr, (void *)header->sh_free_addr); *shmem = header; @@ -3719,8 +3795,8 @@ shmalloc(struct d_tm_shmem_hdr *shmem, int length) shmem->sh_bytes_free -= length; shmem->sh_free_addr += length; D_DEBUG(DB_TRACE, - "Allocated %d bytes. Now %" PRIu64 " remain\n", - length, shmem->sh_bytes_free); + "Allocated %d bytes. Now %" PRIu64 " remain %p/%p\n", + length, shmem->sh_bytes_free, shmem, new_mem); memset(conv_ptr(shmem, new_mem), 0, length); return new_mem; } diff --git a/src/include/daos/pool.h b/src/include/daos/pool.h index 52113adf534..5b16b8bb2b1 100644 --- a/src/include/daos/pool.h +++ b/src/include/daos/pool.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -101,7 +101,8 @@ struct dc_pool { uint32_t dp_map_version_known; uint32_t dp_disconnecting:1, dp_slave:1, /* generated via g2l */ - dp_rf_valid:1; + dp_rf_valid:1, + dp_metrics_init:1; /* required/allocated pool map size */ size_t dp_map_sz; diff --git a/src/include/gurt/telemetry_producer.h b/src/include/gurt/telemetry_producer.h index 5cd323637d4..0b0bdf88629 100644 --- a/src/include/gurt/telemetry_producer.h +++ b/src/include/gurt/telemetry_producer.h @@ -29,5 +29,6 @@ int d_tm_add_metric(struct d_tm_node_t **node, int metric_type, char *desc, int d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, const char *fmt, ...); int d_tm_del_ephemeral_dir(const char *fmt, ...); +int d_tm_try_del_ephemeral_dir(const char *fmt, ...); void d_tm_fini(void); #endif /* __TELEMETRY_PRODUCER_H__ */ diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index b39be6b7279..969e1748bb1 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -713,6 +713,7 @@ obj_shard_update_metrics_end(crt_rpc_t *rpc, uint64_t send_time, void *arg, int pool = rw_args->shard_args->auxi.obj_auxi->obj->cob_pool; D_ASSERT(pool != NULL); opm = pool->dp_metrics[DAOS_OBJ_MODULE]; + D_ASSERTF(opm != NULL, "pool %p\n", pool); if (opc == DAOS_OBJ_RPC_UPDATE) { size = daos_sgls_packed_size(rw_args->rwaa_sgls, orw->orw_nr, NULL); d_tm_inc_counter(opm->opm_update_bytes, size); diff --git a/src/pool/cli.c b/src/pool/cli.c index 2c152e7c7dc..8cd6480a06b 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -577,6 +577,9 @@ dc_pool_metrics_stop(struct dc_pool *pool) if (!daos_client_metric) return; + if (!pool->dp_metrics_init) + return; + daos_module_fini_metrics(DAOS_CLI_TAG, pool->dp_metrics); if (!daos_client_metric_retain) { rc = d_tm_del_ephemeral_dir(pool->dp_path); @@ -600,6 +603,13 @@ dc_pool_metrics_start(struct dc_pool *pool) if (!daos_client_metric) return 0; + D_MUTEX_LOCK(&pool->dp_client_lock); + if (pool->dp_metrics_init) { + D_MUTEX_UNLOCK(&pool->dp_client_lock); + return 0; + } + D_MUTEX_UNLOCK(&pool->dp_client_lock); + snprintf(pool->dp_path, sizeof(pool->dp_path), "%s/%u/pool/"DF_UUIDF, dc_jobid, pid, DP_UUID(pool->dp_pool)); @@ -621,6 +631,9 @@ dc_pool_metrics_start(struct dc_pool *pool) return rc; } + D_MUTEX_LOCK(&pool->dp_client_lock); + pool->dp_metrics_init = 1; + D_MUTEX_UNLOCK(&pool->dp_client_lock); D_INFO(DF_UUID ": created metrics for pool %s\n", DP_UUID(pool->dp_pool), pool->dp_path); return 0; @@ -650,10 +663,6 @@ init_pool(const char *label, uuid_t uuid, uint64_t capas, const char *grp, if (rc != 0) D_GOTO(err_pool, rc); - rc = dc_pool_metrics_start(pool); - if (rc != 0) - D_GOTO(err_pool, rc); - /** Agent configuration data from pool->dp_sys->sy_info */ /** sy_info.provider */ /** sy_info.interface */ @@ -698,6 +707,10 @@ dc_pool_connect_internal(tse_task_t *task, daos_pool_info_t *info, const char *l goto out; } + rc = dc_pool_metrics_start(pool); + if (rc != 0) + D_GOTO(out, rc); + /** Pool connect RPC by UUID (provided, or looked up by label above) */ rc = pool_req_create(daos_task2ctx(task), &ep, POOL_CONNECT, pool->dp_pool, pool->dp_pool_hdl, &tpriv->rq_time, &rpc); @@ -1191,6 +1204,10 @@ dc_pool_g2l(struct dc_pool_glob *pool_glob, size_t len, daos_handle_t *poh) if (rc < 0) goto out; + rc = dc_pool_metrics_start(pool); + if (rc != 0) + goto out; + rc = pool_map_create(map_buf, pool_glob->dpg_map_version, &map); if (rc != 0) { D_ERROR("failed to create local pool map: "DF_RC"\n", From b938c3e4358135725edef08a847c4121d208cc55 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Fri, 5 Jan 2024 03:50:22 +0000 Subject: [PATCH 06/18] DAOS-8331 gurt: fix memory corruption fix memory corruption Required-githooks: true Signed-off-by: Di Wang --- src/gurt/telemetry.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index 503242f19e8..dcbe34e5d1d 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -866,10 +866,8 @@ is_node_empty(struct d_tm_node_t *node) child = conv_ptr(shmem, node->dtn_child); while (child != NULL && !is_cleared_link(ctx, child)) { child = conv_ptr(shmem, child->dtn_sibling); - if (child->dtn_name != NULL) { - D_DEBUG(DB_TRACE, "still have child %s\n", child->dtn_name); + if (child->dtn_name != NULL) return false; - } } return true; From 0b45f82f89ffc4b02ce10086fea221ef5c9c03a3 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Sat, 6 Jan 2024 04:02:05 +0000 Subject: [PATCH 07/18] DAOS-8331 pool: Make per pool metrices independent Move per pool metrics outside of dc_pool, and make then independent, and each dc_pool will attach these metrics after connection. Required-githooks: true Signed-off-by: Di Wang --- src/include/daos/pool.h | 6 +- src/pool/cli.c | 285 +++++++++++++++++++++++++++++----------- src/pool/cli_internal.h | 20 ++- 3 files changed, 227 insertions(+), 84 deletions(-) diff --git a/src/include/daos/pool.h b/src/include/daos/pool.h index 5b16b8bb2b1..a8679da1240 100644 --- a/src/include/daos/pool.h +++ b/src/include/daos/pool.h @@ -95,14 +95,12 @@ struct dc_pool { pthread_rwlock_t dp_map_lock; struct pool_map *dp_map; tse_task_t *dp_map_task; - void *dp_metrics[DAOS_NR_MODULE]; - char dp_path[D_TM_MAX_NAME_LEN]; + void **dp_metrics; /* highest known pool map version */ uint32_t dp_map_version_known; uint32_t dp_disconnecting:1, dp_slave:1, /* generated via g2l */ - dp_rf_valid:1, - dp_metrics_init:1; + dp_rf_valid:1; /* required/allocated pool map size */ size_t dp_map_sz; diff --git a/src/pool/cli.c b/src/pool/cli.c index 8cd6480a06b..388a8f6d0d3 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -43,6 +43,153 @@ struct pool_task_priv { struct pool_update_state *state; /* (pool_update_internal) */ }; +struct dc_pool_metrics { + d_list_t dp_pool_list; /* pool metrics list on this thread */ + uuid_t dp_uuid; + char dp_path[D_TM_MAX_NAME_LEN]; + void *dp_metrics[DAOS_NR_MODULE]; + int dp_ref; +}; + +/** + * Destroy metrics for a specific pool. + * + * \param[in] pool pointer to ds_pool structure + */ +static void +dc_pool_metrics_free(struct dc_pool_metrics *metrics) +{ + int rc; + + if (!daos_client_metric) + return; + + daos_module_fini_metrics(DAOS_CLI_TAG, metrics->dp_metrics); + if (!daos_client_metric_retain) { + rc = d_tm_del_ephemeral_dir(metrics->dp_path); + if (rc != 0) { + D_WARN(DF_UUID ": failed to remove pool metrics dir for pool: " + DF_RC"\n", DP_UUID(metrics->dp_uuid), DP_RC(rc)); + return; + } + } + + D_INFO(DF_UUID ": destroyed ds_pool metrics: %s\n", DP_UUID(metrics->dp_uuid), + metrics->dp_path); +} + +static int +dc_pool_metrics_alloc(uuid_t pool_uuid, struct dc_pool_metrics **metrics_p) +{ + struct dc_pool_metrics *metrics = NULL; + int pid; + size_t size; + int rc; + + if (!daos_client_metric) + return 0; + + D_ALLOC_PTR(metrics); + if (metrics == NULL) + return -DER_NOMEM; + + uuid_copy(metrics->dp_uuid, pool_uuid); + pid = getpid(); + snprintf(metrics->dp_path, sizeof(metrics->dp_path), "%s/%u/pool/"DF_UUIDF, + dc_jobid, pid, DP_UUID(metrics->dp_uuid)); + + /** create new shmem space for per-pool metrics */ + size = daos_module_nr_pool_metrics() * PER_METRIC_BYTES; + rc = d_tm_add_ephemeral_dir(NULL, size, metrics->dp_path); + if (rc != 0) { + D_WARN(DF_UUID ": failed to create metrics dir for pool: " + DF_RC "\n", DP_UUID(metrics->dp_uuid), DP_RC(rc)); + return rc; + } + + /* initialize metrics on the system xstream for each module */ + rc = daos_module_init_metrics(DAOS_CLI_TAG, metrics->dp_metrics, + metrics->dp_path, pid); + if (rc != 0) { + D_WARN(DF_UUID ": failed to initialize module metrics: " + DF_RC"\n", DP_UUID(metrics->dp_uuid), DP_RC(rc)); + dc_pool_metrics_free(metrics); + return rc; + } + + D_INFO(DF_UUID ": created metrics for pool %s\n", DP_UUID(metrics->dp_uuid), + metrics->dp_path); + *metrics_p = metrics; + + return 0; +} + +struct dc_pool_metrics* +dc_pool_metrics_lookup(struct dc_pool_tls *tls, uuid_t pool_uuid) +{ + struct dc_pool_metrics *metrics; + + D_MUTEX_LOCK(&tls->dpc_metrics_list_lock); + d_list_for_each_entry(metrics, &tls->dpc_metrics_list, dp_pool_list) { + if (uuid_compare(pool_uuid, metrics->dp_uuid) == 0) { + D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock); + return metrics; + } + } + D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock); + + return NULL; +} + +static void* +dc_pool_tls_init(int tags, int xs_id, int pid) +{ + struct dc_pool_tls *tls; + int rc; + + D_ALLOC_PTR(tls); + if (tls == NULL) + return NULL; + + rc = D_MUTEX_INIT(&tls->dpc_metrics_list_lock, NULL); + if (rc != 0) { + D_FREE(tls); + return NULL; + } + + D_INIT_LIST_HEAD(&tls->dpc_metrics_list); + return tls; +} + +static void +dc_pool_tls_fini(int tags, void *data) +{ + struct dc_pool_tls *tls = data; + struct dc_pool_metrics *dpm; + struct dc_pool_metrics *tmp; + + D_MUTEX_LOCK(&tls->dpc_metrics_list_lock); + d_list_for_each_entry_safe(dpm, tmp, &tls->dpc_metrics_list, dp_pool_list) { + if (dpm->dp_ref != 0) + D_WARN("still reference for pool "DF_UUID" metrics\n", + DP_UUID(dpm->dp_uuid)); + d_list_del_init(&dpm->dp_pool_list); + dc_pool_metrics_free(dpm); + D_FREE(dpm); + } + D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock); + + D_MUTEX_DESTROY(&tls->dpc_metrics_list_lock); + D_FREE(tls); +} + +struct daos_module_key dc_pool_module_key = { + .dmk_tags = DAOS_CLI_TAG, + .dmk_index = -1, + .dmk_init = dc_pool_tls_init, + .dmk_fini = dc_pool_tls_fini, +}; + /** * Initialize pool interface */ @@ -52,6 +199,10 @@ dc_pool_init(void) uint32_t ver_array[2] = {DAOS_POOL_VERSION - 1, DAOS_POOL_VERSION}; int rc; + d_getenv_bool(DAOS_CLIENT_METRICS_ENV, &daos_client_metric); + if (daos_client_metric) + daos_register_key(&dc_pool_module_key); + dc_pool_proto_version = 0; rc = daos_rpc_proto_query(pool_proto_fmt_v5.cpf_base, ver_array, 2, &dc_pool_proto_version); if (rc) @@ -93,6 +244,61 @@ dc_pool_fini(void) } if (rc != 0) DL_ERROR(rc, "failed to unregister pool RPCs"); + + if (daos_client_metric) + daos_unregister_key(&dc_pool_module_key); +} + +static int +dc_pool_metrics_start(struct dc_pool *pool) +{ + struct dc_pool_tls *tls; + struct dc_pool_metrics *metrics; + int rc; + + if (pool->dp_metrics != NULL) + return 0; + + tls = dc_pool_tls_get(); + D_ASSERT(tls != NULL); + + metrics = dc_pool_metrics_lookup(tls, pool->dp_pool); + if (metrics != NULL) { + metrics->dp_ref++; + pool->dp_metrics = metrics->dp_metrics; + return 0; + } + + rc = dc_pool_metrics_alloc(pool->dp_pool, &metrics); + if (rc != 0) + return rc; + + D_MUTEX_LOCK(&tls->dpc_metrics_list_lock); + d_list_add(&metrics->dp_pool_list, &tls->dpc_metrics_list); + D_MUTEX_UNLOCK(&tls->dpc_metrics_list_lock); + metrics->dp_ref++; + pool->dp_metrics = metrics->dp_metrics; + + return 0; +} + +static void +dc_pool_metrics_stop(struct dc_pool *pool) +{ + struct dc_pool_metrics *metrics; + struct dc_pool_tls *tls; + + if (pool->dp_metrics == NULL) + return; + + tls = dc_pool_tls_get(); + D_ASSERT(tls != NULL); + + metrics = dc_pool_metrics_lookup(tls, pool->dp_pool); + if (metrics != NULL) + metrics->dp_ref--; + + pool->dp_metrics = NULL; } static void @@ -111,11 +317,11 @@ pool_free(struct d_hlink *hlink) D_MUTEX_DESTROY(&pool->dp_client_lock); D_RWLOCK_DESTROY(&pool->dp_co_list_lock); - dc_pool_metrics_stop(pool); - if (pool->dp_map != NULL) pool_map_decref(pool->dp_map); + dc_pool_metrics_stop(pool); + rsvc_client_fini(&pool->dp_client); if (pool->dp_sys != NULL) dc_mgmt_sys_detach(pool->dp_sys); @@ -564,81 +770,6 @@ pool_connect_cp(tse_task_t *task, void *data) return rc; } -/** - * Destroy metrics for a specific pool. - * - * \param[in] pool pointer to ds_pool structure - */ -void -dc_pool_metrics_stop(struct dc_pool *pool) -{ - int rc; - - if (!daos_client_metric) - return; - - if (!pool->dp_metrics_init) - return; - - daos_module_fini_metrics(DAOS_CLI_TAG, pool->dp_metrics); - if (!daos_client_metric_retain) { - rc = d_tm_del_ephemeral_dir(pool->dp_path); - if (rc != 0) { - D_WARN(DF_UUID ": failed to remove pool metrics dir for pool: " - DF_RC"\n", DP_UUID(pool->dp_pool), DP_RC(rc)); - return; - } - } - - D_INFO(DF_UUID ": destroyed ds_pool metrics: %s\n", DP_UUID(pool->dp_pool), pool->dp_path); -} - -int -dc_pool_metrics_start(struct dc_pool *pool) -{ - int pid = getpid(); - size_t size = daos_module_nr_pool_metrics() * PER_METRIC_BYTES; - int rc; - - if (!daos_client_metric) - return 0; - - D_MUTEX_LOCK(&pool->dp_client_lock); - if (pool->dp_metrics_init) { - D_MUTEX_UNLOCK(&pool->dp_client_lock); - return 0; - } - D_MUTEX_UNLOCK(&pool->dp_client_lock); - - snprintf(pool->dp_path, sizeof(pool->dp_path), "%s/%u/pool/"DF_UUIDF, - dc_jobid, pid, DP_UUID(pool->dp_pool)); - - /** create new shmem space for per-pool metrics */ - rc = d_tm_add_ephemeral_dir(NULL, size, pool->dp_path); - if (rc != 0) { - D_WARN(DF_UUID ": failed to create metrics dir for pool: " - DF_RC "\n", DP_UUID(pool->dp_pool), DP_RC(rc)); - return rc; - } - - /* initialize metrics on the system xstream for each module */ - rc = daos_module_init_metrics(DAOS_CLI_TAG, pool->dp_metrics, - pool->dp_path, pid); - if (rc != 0) { - D_WARN(DF_UUID ": failed to initialize module metrics: " - DF_RC"\n", DP_UUID(pool->dp_pool), DP_RC(rc)); - dc_pool_metrics_stop(pool); - return rc; - } - - D_MUTEX_LOCK(&pool->dp_client_lock); - pool->dp_metrics_init = 1; - D_MUTEX_UNLOCK(&pool->dp_client_lock); - D_INFO(DF_UUID ": created metrics for pool %s\n", DP_UUID(pool->dp_pool), pool->dp_path); - - return 0; -} - /* allocate and initialize a dc_pool by label or uuid */ static int init_pool(const char *label, uuid_t uuid, uint64_t capas, const char *grp, diff --git a/src/pool/cli_internal.h b/src/pool/cli_internal.h index 0c14d5a1888..7234599b976 100644 --- a/src/pool/cli_internal.h +++ b/src/pool/cli_internal.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -15,7 +15,21 @@ void dc_pool_hdl_unlink(struct dc_pool *pool); struct dc_pool *dc_pool_alloc(unsigned int nr); int dc_pool_map_update(struct dc_pool *pool, struct pool_map *map, bool connect); -int dc_pool_metrics_start(struct dc_pool *pool); -void dc_pool_metrics_stop(struct dc_pool *pool); +struct dc_pool_tls { + pthread_mutex_t dpc_metrics_list_lock; + d_list_t dpc_metrics_list; +}; + +extern struct daos_module_key dc_pool_module_key; + +static inline struct dc_pool_tls * +dc_pool_tls_get() +{ + struct daos_thread_local_storage *dtls; + + dtls = dc_tls_get(dc_pool_module_key.dmk_tags); + D_ASSERT(dtls != NULL); + return daos_module_key_get(dtls, &dc_pool_module_key); +} #endif /* __POOL_CLIENT_INTERNAL_H__ */ From 7910300446fd84bfa71b5a0d0fc3ed07920c469b Mon Sep 17 00:00:00 2001 From: Di Wang Date: Sat, 6 Jan 2024 04:49:46 +0000 Subject: [PATCH 08/18] DAOS-8331 pool: missing metrics enable checking Missing metrics enable checking. Required-githooks: true Signed-off-by: Di Wang --- src/pool/cli.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pool/cli.c b/src/pool/cli.c index 388a8f6d0d3..ac8798b5e44 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -256,6 +256,9 @@ dc_pool_metrics_start(struct dc_pool *pool) struct dc_pool_metrics *metrics; int rc; + if (!daos_client_metric) + return 0; + if (pool->dp_metrics != NULL) return 0; @@ -288,6 +291,9 @@ dc_pool_metrics_stop(struct dc_pool *pool) struct dc_pool_metrics *metrics; struct dc_pool_tls *tls; + if (!daos_client_metric) + return; + if (pool->dp_metrics == NULL) return; From 8182b70a22a0fae570c438ae3cb29cd7c3940d56 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Mon, 8 Jan 2024 19:38:35 +0000 Subject: [PATCH 09/18] DAOS-8331 object: do not get time if metrics are not enabled. Do not need get time if metrics are not enabled. Required-githooks: true Signed-off-by: Di Wang --- src/object/cli_shard.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index 969e1748bb1..1a80e1125e7 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -1228,7 +1228,7 @@ dc_obj_shard_rw(struct dc_obj_shard *shard, enum obj_rpc_opc opc, rw_args.shard_args = args; /* remember the sgl to copyout the data inline for fetch */ rw_args.rwaa_sgls = sgls; - rw_args.send_time = daos_get_ntime(); + rw_args.send_time = daos_client_metric ? daos_get_ntime() : 0; obj_shard_update_metrics_begin(req); if (args->reasb_req && args->reasb_req->orr_recov) { rw_args.maps = NULL; @@ -1367,7 +1367,7 @@ dc_obj_shard_punch(struct dc_obj_shard *shard, enum obj_rpc_opc opc, cb_args.rpc = req; cb_args.map_ver = &args->pa_auxi.map_ver; cb_args.shard_args = args; - cb_args.send_time = daos_get_ntime(); + cb_args.send_time = daos_client_metric ? daos_get_ntime() : 0; obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, obj_shard_punch_cb, &cb_args, sizeof(cb_args)); @@ -2054,7 +2054,7 @@ dc_obj_shard_list(struct dc_obj_shard *obj_shard, enum obj_rpc_opc opc, enum_args.th = &obj_args->th; enum_args.enqueue_id = &args->la_auxi.enqueue_id; enum_args.max_delay = &args->la_auxi.obj_auxi->max_delay; - enum_args.send_time = daos_get_ntime(); + enum_args.send_time = daos_client_metric ? daos_get_ntime() : 0; obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, dc_enumerate_cb, &enum_args, sizeof(enum_args)); @@ -2354,7 +2354,7 @@ dc_obj_shard_query_key(struct dc_obj_shard *shard, struct dtx_epoch *epoch, uint cb_args.max_epoch = max_epoch; cb_args.queue_id = queue_id; cb_args.max_delay = max_delay; - cb_args.send_time = daos_get_ntime(); + cb_args.send_time = daos_client_metric ? daos_get_ntime() : 0; obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, obj_shard_query_key_cb, &cb_args, sizeof(cb_args)); @@ -2506,7 +2506,7 @@ dc_obj_shard_sync(struct dc_obj_shard *shard, enum obj_rpc_opc opc, cb_args.map_ver = &args->sa_auxi.map_ver; cb_args.max_delay = &args->sa_auxi.obj_auxi->max_delay; cb_args.enqueue_id = &args->sa_auxi.enqueue_id; - cb_args.send_time = daos_get_ntime(); + cb_args.send_time = daos_client_metric ? daos_get_ntime() : 0; obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, obj_shard_sync_cb, &cb_args, sizeof(cb_args)); @@ -2691,7 +2691,7 @@ dc_obj_shard_key2anchor(struct dc_obj_shard *obj_shard, enum obj_rpc_opc opc, cb_args.shard = obj_shard->do_shard_idx; cb_args.enqueue_id = &args->ka_auxi.enqueue_id; cb_args.max_delay = &args->ka_auxi.obj_auxi->max_delay; - cb_args.send_time = daos_get_ntime(); + cb_args.send_time = daos_client_metric ? daos_get_ntime() : 0; obj_shard_update_metrics_begin(req); rc = tse_task_register_comp_cb(task, dc_k2a_cb, &cb_args, sizeof(cb_args)); if (rc != 0) From 5572d24a0056d6336a0e180208b9a7abd73f5bff Mon Sep 17 00:00:00 2001 From: Di Wang Date: Tue, 9 Jan 2024 18:28:19 +0000 Subject: [PATCH 10/18] DAOS-8331 client: Update patch to fix environment Update the patch by Mohamad comments Required-githooks: true Signed-off-by: Di Wang --- src/cart/README.env | 12 ++++++++++++ src/client/api/metrics.c | 11 ++++++----- src/include/daos/metrics.h | 7 ++++--- src/include/gurt/telemetry_common.h | 3 +-- src/object/cli_mod.c | 2 +- src/pool/cli.c | 2 +- src/utils/daos_metrics/daos_metrics.c | 3 ++- 7 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/cart/README.env b/src/cart/README.env index 3d4a12963df..8187e6aee2b 100644 --- a/src/cart/README.env +++ b/src/cart/README.env @@ -174,3 +174,15 @@ This file lists the environment variables used in CaRT. . CRT_TEST_CONT When set to 1, orterun does not automatically shut down other servers when one server is shutdown. Used in cart internal testing. + + . D_CLIENT_METRICS_ENABLE + When set to 1, client side metrics will be collected on each daos client, which + can by retrieved by daos_metrics -j job_id on each client. + + . D_CLIENT_METRICS_RETAIN + when set to 1, client side metrics will be retained even after the job exits, i.e. + those metrics can be retrieved by daos_metrics even after job exits. + + . D_CLIENT_METRICS_DUMP_PATH + Set client side metrics dump path(file) for each client, so these metrics will be + dumped to the specified file when the job exits. diff --git a/src/client/api/metrics.c b/src/client/api/metrics.c index e48d9483df0..d6be903de35 100644 --- a/src/client/api/metrics.c +++ b/src/client/api/metrics.c @@ -31,7 +31,7 @@ dc_tm_init(void) pid_t pid; int rc; - d_getenv_bool(DAOS_CLIENT_METRICS_ENV, &daos_client_metric); + d_getenv_bool(DAOS_CLIENT_METRICS_ENABLE, &daos_client_metric); if (!daos_client_metric) return 0; @@ -40,7 +40,7 @@ dc_tm_init(void) D_GOTO(out, rc); metrics_tag = D_TM_OPEN_OR_CREATE; - d_getenv_bool(DAOS_CLIENT_METRICS_RETAIN_ENV, &daos_client_metric_retain); + d_getenv_bool(DAOS_CLIENT_METRICS_RETAIN, &daos_client_metric_retain); if (daos_client_metric_retain) metrics_tag |= D_TM_RETAIN_SHMEM; else @@ -133,10 +133,11 @@ dc_tm_fini() if (!daos_client_metric) return; - dump_path = getenv(METRIC_DUMP_ENV); - D_INFO("dump path is %s\n", dump_path); - if (dump_path != NULL) + dump_path = getenv(DAOS_CLIENT_METRICS_DUMP_PATH); + if (dump_path != NULL) { + D_INFO("dump path is %s\n", dump_path); dump_tm_file(dump_path); + } dc_tls_fini(); dc_tls_key_delete(); diff --git a/src/include/daos/metrics.h b/src/include/daos/metrics.h index 797ef89fa4d..e4bf5870d18 100644 --- a/src/include/daos/metrics.h +++ b/src/include/daos/metrics.h @@ -17,10 +17,11 @@ #include #include +#define DC_TM_JOB_ROOT_ID 256 /* For now TLS is only enabled if metrics are enabled */ -#define METRIC_DUMP_ENV "DAOS_METRIC_DUMP_ENV" -#define DAOS_CLIENT_METRICS_ENV "DAOS_CLIENT_METRICS" -#define DAOS_CLIENT_METRICS_RETAIN_ENV "DAOS_CLIENT_METRICS_RETAIN" +#define DAOS_CLIENT_METRICS_DUMP_PATH "D_CLIENT_METRIC_DUMP_PATH" +#define DAOS_CLIENT_METRICS_ENABLE "D_CLIENT_METRICS_ENABLE" +#define DAOS_CLIENT_METRICS_RETAIN "D_CLIENT_METRICS_RETAIN" extern bool daos_client_metric; extern bool daos_client_metric_retain; diff --git a/src/include/gurt/telemetry_common.h b/src/include/gurt/telemetry_common.h index a3ba8902010..bc287771964 100644 --- a/src/include/gurt/telemetry_common.h +++ b/src/include/gurt/telemetry_common.h @@ -1,5 +1,5 @@ /** - * (C) Copyright 2020-2023 Intel Corporation. + * (C) Copyright 2020-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -178,7 +178,6 @@ enum { D_TM_ITER_RESET = 0x002, }; -#define DC_TM_JOB_ROOT_ID 256 /** * @brief Statistics for gauge and duration metrics * diff --git a/src/object/cli_mod.c b/src/object/cli_mod.c index 47b3cb68a95..a20645b685e 100644 --- a/src/object/cli_mod.c +++ b/src/object/cli_mod.c @@ -137,7 +137,7 @@ dc_obj_init(void) uint32_t ver_array[2] = {DAOS_OBJ_VERSION - 1, DAOS_OBJ_VERSION}; int rc; - d_getenv_bool(DAOS_CLIENT_METRICS_ENV, &daos_client_metric); + d_getenv_bool(DAOS_CLIENT_METRICS_ENABLE, &daos_client_metric); if (daos_client_metric) { daos_register_key(&dc_obj_module_key); rc = daos_register_metrics(DAOS_CLI_TAG, DAOS_OBJ_MODULE, diff --git a/src/pool/cli.c b/src/pool/cli.c index ac8798b5e44..ded4abdeab3 100644 --- a/src/pool/cli.c +++ b/src/pool/cli.c @@ -199,7 +199,7 @@ dc_pool_init(void) uint32_t ver_array[2] = {DAOS_POOL_VERSION - 1, DAOS_POOL_VERSION}; int rc; - d_getenv_bool(DAOS_CLIENT_METRICS_ENV, &daos_client_metric); + d_getenv_bool(DAOS_CLIENT_METRICS_ENABLE, &daos_client_metric); if (daos_client_metric) daos_register_key(&dc_pool_module_key); diff --git a/src/utils/daos_metrics/daos_metrics.c b/src/utils/daos_metrics/daos_metrics.c index cbdaeb2fc84..00376366f9c 100644 --- a/src/utils/daos_metrics/daos_metrics.c +++ b/src/utils/daos_metrics/daos_metrics.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2021-2023 Intel Corporation. + * (C) Copyright 2021-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -10,6 +10,7 @@ #include #include +#include #include #include From c363d7b93a1b1902091fd8059d3fe73bc031eb51 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Tue, 9 Jan 2024 18:53:21 +0000 Subject: [PATCH 11/18] DAOS-8331 client: fix environment VAR name fix environment VAR name Required-githooks: true Signed-off-by: Di Wang --- src/include/daos/metrics.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/daos/metrics.h b/src/include/daos/metrics.h index e4bf5870d18..bfad7648b0a 100644 --- a/src/include/daos/metrics.h +++ b/src/include/daos/metrics.h @@ -19,7 +19,7 @@ #define DC_TM_JOB_ROOT_ID 256 /* For now TLS is only enabled if metrics are enabled */ -#define DAOS_CLIENT_METRICS_DUMP_PATH "D_CLIENT_METRIC_DUMP_PATH" +#define DAOS_CLIENT_METRICS_DUMP_PATH "D_CLIENT_METRICS_DUMP_PATH" #define DAOS_CLIENT_METRICS_ENABLE "D_CLIENT_METRICS_ENABLE" #define DAOS_CLIENT_METRICS_RETAIN "D_CLIENT_METRICS_RETAIN" extern bool daos_client_metric; From fa5eea605cc4b0500b1d93a29dbbc978b6bf1b2a Mon Sep 17 00:00:00 2001 From: Di Wang Date: Mon, 5 Feb 2024 16:28:39 +0000 Subject: [PATCH 12/18] DAOS-8331 objects: fix building failure. Fix building failure. Required-githooks: true Signed-off-by: Di Wang --- src/object/cli_shard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/object/cli_shard.c b/src/object/cli_shard.c index a35cc253a4e..7046ec87da8 100644 --- a/src/object/cli_shard.c +++ b/src/object/cli_shard.c @@ -2144,7 +2144,7 @@ obj_shard_query_key_cb(tse_task_t *task, void *data) D_SPIN_UNLOCK(&cb_args->obj->cob_spin); out: - obj_shard_update_metrics_end(rpc, cb_args->send_time, cb_args, ret == 0 ? rc : ret); + obj_shard_update_metrics_end(rpc, cb_args->send_time, cb_args, rc); crt_req_decref(rpc); return rc; } From 0dc06964e31a03439618d6419b33f2347eeec491 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Tue, 6 Feb 2024 19:50:27 +0000 Subject: [PATCH 13/18] DAOS-8331 client: Fix segfault Fix segfault due to region list corruption. Update PR by Ashely review. Required-githooks: true Signed-off-by: Di Wang --- src/cart/crt_init.c | 7 ++++++- src/client/api/metrics.c | 2 +- src/gurt/telemetry.c | 6 +++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index aa5716e71b5..873a67ba372 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -54,7 +54,12 @@ static const char *crt_env_names[] = {"D_PROVIDER", "D_QUOTA_RPCS", "D_POST_INIT", "D_POST_INCR", - "DAOS_SIGNAL_REGISTER"}; + "DAOS_SIGNAL_REGISTER", + "D_CLIENT_METRICS_ENABLE", + "D_CLIENT_METRICS_RETAIN", + "D_CLIENT_METRICS_DUMP_PATH", + +}; static void crt_lib_init(void) __attribute__((__constructor__)); diff --git a/src/client/api/metrics.c b/src/client/api/metrics.c index d6be903de35..c640a5b7a11 100644 --- a/src/client/api/metrics.c +++ b/src/client/api/metrics.c @@ -55,7 +55,7 @@ dc_tm_init(void) D_INFO("INIT %s metrics\n", dc_jobid); rc = d_tm_add_ephemeral_dir(NULL, MAX_IDS_SIZE(INIT_JOB_NUM), "%s", dc_jobid); if (rc != 0 && rc != -DER_EXIST) { - DL_ERROR(rc, "add metric %s failed.\n", dc_jobid); + DL_ERROR(rc, "add metric %s failed", dc_jobid); D_GOTO(out, rc); } diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index dcbe34e5d1d..bd39e86acd4 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -2455,7 +2455,11 @@ get_free_region_entry(struct d_tm_shmem_hdr *shmem, } next = conv_ptr(shmem, head->next); - cur = head->next; + if (d_list_empty(&shmem->sh_subregions)) + cur = (d_list_t *)(shmem->sh_base_addr + + (uint64_t)(&((struct d_tm_shmem_hdr *)(0))->sh_subregions)); + else + cur = head->next; head->next = &tmp->rl_link; next->prev = &tmp->rl_link; From 3a1c9306c3eeb5df2dccf9b7e77b65235bed281f Mon Sep 17 00:00:00 2001 From: Di Wang Date: Wed, 7 Feb 2024 23:24:29 +0000 Subject: [PATCH 14/18] DAOS-8331: gurt: add ephemeral lock for ephemeral directory Add ephemeral lock for ephemeral directory add and delete. Required-githooks: true Signed-off-by: Di Wang --- src/client/api/metrics.c | 2 +- src/gurt/telemetry.c | 36 +++++++++++++++++++++++++---- src/include/gurt/telemetry_common.h | 1 + 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/client/api/metrics.c b/src/client/api/metrics.c index c640a5b7a11..116e406062b 100644 --- a/src/client/api/metrics.c +++ b/src/client/api/metrics.c @@ -39,7 +39,7 @@ dc_tm_init(void) if (rc) D_GOTO(out, rc); - metrics_tag = D_TM_OPEN_OR_CREATE; + metrics_tag = D_TM_OPEN_OR_CREATE | D_TM_EPHEMERAL_DIR_LOCK; d_getenv_bool(DAOS_CLIENT_METRICS_RETAIN, &daos_client_metric_retain); if (daos_client_metric_retain) metrics_tag |= D_TM_RETAIN_SHMEM; diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index bd39e86acd4..a8aab710c42 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -31,12 +31,17 @@ struct shmem_region_list { struct d_tm_shmem_hdr { uint64_t sh_base_addr; /** address of this struct */ key_t sh_key; /** key to access region */ - bool sh_deleted; /** marked for deletion */ + uint32_t sh_deleted:1, /** marked for deletion */ + sh_require_ephemeral_lock:1; /** ephemeral_lock required */ uint8_t sh_reserved[3]; /** for alignment */ uint64_t sh_bytes_total; /** total size of region */ uint64_t sh_bytes_free; /** free bytes in this region */ void *sh_free_addr; /** start of free space */ struct d_tm_node_t *sh_root; /** root of metric tree */ + + /* lock to create and remove ephemeral_dir_lock */ + pthread_mutex_t sh_ephemeral_dir_lock; + /** * List of all ephemeral regions attached to this shmem region. */ @@ -71,7 +76,8 @@ static struct d_tm_shmem { pthread_mutex_t add_lock; /** for synchronized access */ uint32_t retain:1, /* retain shmem region during exit */ sync_access:1, - retain_non_empty:1; /** retain shmem region if it is not empty */ + retain_non_empty:1, /** retain shmem region if it is not empty */ + ephemeral_dir_lock:1; /** lock for ephemeral directory */ int id; /** Instance ID */ } tm_shmem; @@ -333,7 +339,7 @@ close_local_shmem_entry(struct local_shmem_list *entry, bool destroy) { d_list_del(&entry->link); if (destroy) - entry->region->sh_deleted = true; + entry->region->sh_deleted = 1; close_shmem(entry->region); if (destroy) @@ -809,6 +815,11 @@ d_tm_init(int id, uint64_t mem_size, int flags) D_INFO("Retaining shared memory for id %d if not empty\n", id); } + if (flags & D_TM_EPHEMERAL_DIR_LOCK) { + tm_shmem.ephemeral_dir_lock = 1; + D_INFO("Retaining shared memory for id %d if not empty\n", id); + } + tm_shmem.id = id; snprintf(tmp, sizeof(tmp), "ID: %d", id); key = d_tm_get_srv_key(id); @@ -2560,10 +2571,13 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, if (rc != 0) D_GOTO(fail, rc); + if (tm_shmem.ephemeral_dir_lock) + D_MUTEX_LOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); + rc = d_tm_lock_shmem(); if (unlikely(rc != 0)) { D_ERROR("failed to get producer mutex\n"); - D_GOTO(fail, rc); + D_GOTO(fail_ephemeral_unlock, rc); } new_node = d_tm_find_metric(ctx, path); @@ -2612,6 +2626,8 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, *node = new_node; d_tm_unlock_shmem(); + if (tm_shmem.ephemeral_dir_lock) + D_MUTEX_UNLOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); return 0; fail_link: @@ -2624,6 +2640,9 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, destroy_shmem(new_shmid); fail_unlock: d_tm_unlock_shmem(); +fail_ephemeral_unlock: + if (tm_shmem.ephemeral_dir_lock) + D_MUTEX_UNLOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); fail: D_ERROR("Failed to add ephemeral dir [%s]: " DF_RC "\n", path, DP_RC(rc)); @@ -3670,6 +3689,7 @@ allocate_shared_memory(key_t key, size_t mem_size, { int shmid; struct d_tm_shmem_hdr *header; + int rc; D_ASSERT(shmem != NULL); @@ -3691,6 +3711,14 @@ allocate_shared_memory(key_t key, size_t mem_size, D_INIT_LIST_HEAD(&header->sh_subregions); + if (tm_shmem.ephemeral_dir_lock) { + rc = D_MUTEX_INIT(&header->sh_ephemeral_dir_lock, NULL); + if (rc) { + DL_ERROR(rc, "create ephemeral dir lock failed"); + return -DER_NO_SHMEM; + } + } + D_DEBUG(DB_MEM, "Created shared memory region for key 0x%x, size=%lu header %p base %p free %p\n", key, mem_size, header, (void *)header->sh_base_addr, (void *)header->sh_free_addr); diff --git a/src/include/gurt/telemetry_common.h b/src/include/gurt/telemetry_common.h index bc287771964..8723a3ff42e 100644 --- a/src/include/gurt/telemetry_common.h +++ b/src/include/gurt/telemetry_common.h @@ -157,6 +157,7 @@ enum { D_TM_RETAIN_SHMEM = 0x002, D_TM_RETAIN_SHMEM_IF_NON_EMPTY = 0x004, D_TM_OPEN_OR_CREATE = 0x008, + D_TM_EPHEMERAL_DIR_LOCK = 0x010, }; /** Output formats */ From e309fda620e9bde222f75f39b2d8dd721c4c14a2 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Thu, 8 Feb 2024 00:07:07 +0000 Subject: [PATCH 15/18] DAOS-8331 gurt: fix input check fix input check Required-githooks: true Signed-off-by: Di Wang --- src/gurt/telemetry.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index a8aab710c42..e585bdaedf2 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -794,7 +794,8 @@ d_tm_init(int id, uint64_t mem_size, int flags) memset(&tm_shmem, 0, sizeof(tm_shmem)); if ((flags & ~(D_TM_SERIALIZATION | D_TM_RETAIN_SHMEM | - D_TM_RETAIN_SHMEM_IF_NON_EMPTY | D_TM_OPEN_OR_CREATE)) != 0) { + D_TM_RETAIN_SHMEM_IF_NON_EMPTY | D_TM_OPEN_OR_CREATE | + D_TM_EPHEMERAL_DIR_LOCK)) != 0) { D_ERROR("Invalid flags 0x%x\n", flags); rc = -DER_INVAL; goto failure; From d5a8748bd1647f243cbd3070c8d6192842abade7 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Thu, 8 Feb 2024 00:38:27 +0000 Subject: [PATCH 16/18] DAOS-8331 gurt: miss lock in ephemeral delete miss lock in ephemeral dir delete. Required-githooks: true Signed-off-by: Di Wang --- src/gurt/telemetry.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index e585bdaedf2..e6f2e573b83 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -2740,10 +2740,13 @@ try_del_ephemeral_dir(char *path, bool force) struct d_tm_node_t *link; int rc = 0; + if (tm_shmem.ephemeral_dir_lock) + D_MUTEX_LOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); + rc = d_tm_lock_shmem(); if (unlikely(rc != 0)) { D_ERROR("failed to get producer mutex\n"); - return rc; + D_GOTO(ephemeral_unlock, rc); } link = get_node(ctx, path); @@ -2754,6 +2757,11 @@ try_del_ephemeral_dir(char *path, bool force) unlock: d_tm_unlock_shmem(); + +ephemeral_unlock: + if (tm_shmem.ephemeral_dir_lock) + D_MUTEX_LOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); + return rc; } /** From c87d38df03a4ecf1fc2a51cb8731f7a73a1992ff Mon Sep 17 00:00:00 2001 From: Di Wang Date: Thu, 8 Feb 2024 00:53:12 +0000 Subject: [PATCH 17/18] DAOS-8331 gurt: fix typo fix typo Required-githooks: true Signed-off-by: Di Wang --- src/gurt/telemetry.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index e6f2e573b83..f83ee00002b 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -2760,7 +2760,7 @@ try_del_ephemeral_dir(char *path, bool force) ephemeral_unlock: if (tm_shmem.ephemeral_dir_lock) - D_MUTEX_LOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); + D_MUTEX_UNLOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); return rc; } From 11c411f3bb4462e3dc48eefd399d6d71977d9dae Mon Sep 17 00:00:00 2001 From: Di Wang Date: Tue, 20 Feb 2024 18:15:26 +0000 Subject: [PATCH 18/18] DAOS-8331 client: resolve Kris comments Resolve comments from Kris. Required-githooks: true Signed-off-by: Di Wang --- src/client/api/metrics.c | 2 +- src/gurt/telemetry.c | 43 +++++++++++++++-------------- src/include/gurt/telemetry_common.h | 2 +- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/client/api/metrics.c b/src/client/api/metrics.c index 116e406062b..a4a78a6e25c 100644 --- a/src/client/api/metrics.c +++ b/src/client/api/metrics.c @@ -39,7 +39,7 @@ dc_tm_init(void) if (rc) D_GOTO(out, rc); - metrics_tag = D_TM_OPEN_OR_CREATE | D_TM_EPHEMERAL_DIR_LOCK; + metrics_tag = D_TM_OPEN_OR_CREATE | D_TM_MULTIPLE_WRITER_LOCK; d_getenv_bool(DAOS_CLIENT_METRICS_RETAIN, &daos_client_metric_retain); if (daos_client_metric_retain) metrics_tag |= D_TM_RETAIN_SHMEM; diff --git a/src/gurt/telemetry.c b/src/gurt/telemetry.c index f83ee00002b..7e776404f4c 100644 --- a/src/gurt/telemetry.c +++ b/src/gurt/telemetry.c @@ -32,15 +32,15 @@ struct d_tm_shmem_hdr { uint64_t sh_base_addr; /** address of this struct */ key_t sh_key; /** key to access region */ uint32_t sh_deleted:1, /** marked for deletion */ - sh_require_ephemeral_lock:1; /** ephemeral_lock required */ + sh_multiple_writer:1; /** require lock to protect */ uint8_t sh_reserved[3]; /** for alignment */ uint64_t sh_bytes_total; /** total size of region */ uint64_t sh_bytes_free; /** free bytes in this region */ void *sh_free_addr; /** start of free space */ struct d_tm_node_t *sh_root; /** root of metric tree */ - /* lock to create and remove ephemeral_dir_lock */ - pthread_mutex_t sh_ephemeral_dir_lock; + /* lock to protect update, mostly for create and remove ephemeral dir */ + pthread_mutex_t sh_multiple_writer_lock; /** * List of all ephemeral regions attached to this shmem region. @@ -77,7 +77,7 @@ static struct d_tm_shmem { uint32_t retain:1, /* retain shmem region during exit */ sync_access:1, retain_non_empty:1, /** retain shmem region if it is not empty */ - ephemeral_dir_lock:1; /** lock for ephemeral directory */ + multiple_writer_lock:1; /** lock for multiple writer */ int id; /** Instance ID */ } tm_shmem; @@ -795,7 +795,7 @@ d_tm_init(int id, uint64_t mem_size, int flags) if ((flags & ~(D_TM_SERIALIZATION | D_TM_RETAIN_SHMEM | D_TM_RETAIN_SHMEM_IF_NON_EMPTY | D_TM_OPEN_OR_CREATE | - D_TM_EPHEMERAL_DIR_LOCK)) != 0) { + D_TM_MULTIPLE_WRITER_LOCK)) != 0) { D_ERROR("Invalid flags 0x%x\n", flags); rc = -DER_INVAL; goto failure; @@ -816,9 +816,9 @@ d_tm_init(int id, uint64_t mem_size, int flags) D_INFO("Retaining shared memory for id %d if not empty\n", id); } - if (flags & D_TM_EPHEMERAL_DIR_LOCK) { - tm_shmem.ephemeral_dir_lock = 1; - D_INFO("Retaining shared memory for id %d if not empty\n", id); + if (flags & D_TM_MULTIPLE_WRITER_LOCK) { + tm_shmem.multiple_writer_lock = 1; + D_INFO("Require multiple write protection for id %d\n", id); } tm_shmem.id = id; @@ -2467,6 +2467,7 @@ get_free_region_entry(struct d_tm_shmem_hdr *shmem, } next = conv_ptr(shmem, head->next); + /* NB: sh_subregions is initialized by D_INIT_LIST_HEAD(), so it is not shmem address */ if (d_list_empty(&shmem->sh_subregions)) cur = (d_list_t *)(shmem->sh_base_addr + (uint64_t)(&((struct d_tm_shmem_hdr *)(0))->sh_subregions)); @@ -2572,8 +2573,8 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, if (rc != 0) D_GOTO(fail, rc); - if (tm_shmem.ephemeral_dir_lock) - D_MUTEX_LOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); + if (tm_shmem.multiple_writer_lock) + D_MUTEX_LOCK(&ctx->shmem_root->sh_multiple_writer_lock); rc = d_tm_lock_shmem(); if (unlikely(rc != 0)) { @@ -2627,8 +2628,8 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, *node = new_node; d_tm_unlock_shmem(); - if (tm_shmem.ephemeral_dir_lock) - D_MUTEX_UNLOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); + if (tm_shmem.multiple_writer_lock) + D_MUTEX_UNLOCK(&ctx->shmem_root->sh_multiple_writer_lock); return 0; fail_link: @@ -2642,8 +2643,8 @@ d_tm_add_ephemeral_dir(struct d_tm_node_t **node, size_t size_bytes, fail_unlock: d_tm_unlock_shmem(); fail_ephemeral_unlock: - if (tm_shmem.ephemeral_dir_lock) - D_MUTEX_UNLOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); + if (tm_shmem.multiple_writer_lock) + D_MUTEX_UNLOCK(&ctx->shmem_root->sh_multiple_writer_lock); fail: D_ERROR("Failed to add ephemeral dir [%s]: " DF_RC "\n", path, DP_RC(rc)); @@ -2740,8 +2741,8 @@ try_del_ephemeral_dir(char *path, bool force) struct d_tm_node_t *link; int rc = 0; - if (tm_shmem.ephemeral_dir_lock) - D_MUTEX_LOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); + if (tm_shmem.multiple_writer_lock) + D_MUTEX_LOCK(&ctx->shmem_root->sh_multiple_writer_lock); rc = d_tm_lock_shmem(); if (unlikely(rc != 0)) { @@ -2759,8 +2760,8 @@ try_del_ephemeral_dir(char *path, bool force) d_tm_unlock_shmem(); ephemeral_unlock: - if (tm_shmem.ephemeral_dir_lock) - D_MUTEX_UNLOCK(&ctx->shmem_root->sh_ephemeral_dir_lock); + if (tm_shmem.multiple_writer_lock) + D_MUTEX_UNLOCK(&ctx->shmem_root->sh_multiple_writer_lock); return rc; } @@ -3720,10 +3721,10 @@ allocate_shared_memory(key_t key, size_t mem_size, D_INIT_LIST_HEAD(&header->sh_subregions); - if (tm_shmem.ephemeral_dir_lock) { - rc = D_MUTEX_INIT(&header->sh_ephemeral_dir_lock, NULL); + if (tm_shmem.multiple_writer_lock) { + rc = D_MUTEX_INIT(&header->sh_multiple_writer_lock, NULL); if (rc) { - DL_ERROR(rc, "create ephemeral dir lock failed"); + DL_ERROR(rc, "multiple writer lock failed"); return -DER_NO_SHMEM; } } diff --git a/src/include/gurt/telemetry_common.h b/src/include/gurt/telemetry_common.h index 8723a3ff42e..ce592326e34 100644 --- a/src/include/gurt/telemetry_common.h +++ b/src/include/gurt/telemetry_common.h @@ -157,7 +157,7 @@ enum { D_TM_RETAIN_SHMEM = 0x002, D_TM_RETAIN_SHMEM_IF_NON_EMPTY = 0x004, D_TM_OPEN_OR_CREATE = 0x008, - D_TM_EPHEMERAL_DIR_LOCK = 0x010, + D_TM_MULTIPLE_WRITER_LOCK = 0x010, }; /** Output formats */