diff --git a/src/diskquota.c b/src/diskquota.c index f714a4f0..cc25b70a 100644 --- a/src/diskquota.c +++ b/src/diskquota.c @@ -70,14 +70,16 @@ static volatile sig_atomic_t got_sigusr1 = false; static volatile sig_atomic_t got_sigusr2 = false; /* GUC variables */ -int diskquota_naptime = 0; -int diskquota_max_active_tables = 0; -int diskquota_worker_timeout = 60; /* default timeout is 60 seconds */ -bool diskquota_hardlimit = false; -int diskquota_max_workers = 10; -int diskquota_max_table_segments = 0; -int diskquota_max_monitored_databases = 0; -int diskquota_max_quota_probes = 0; +int diskquota_naptime = 0; +int diskquota_max_active_tables = 0; +int diskquota_worker_timeout = 60; /* default timeout is 60 seconds */ +bool diskquota_hardlimit = false; +int diskquota_max_workers = 10; +int diskquota_max_table_segments = 0; +int diskquota_max_monitored_databases = 0; +int diskquota_max_quota_probes = 0; +int diskquota_max_local_reject_entries = 0; +int diskquota_hashmap_overflow_report_timeout = 0; DiskQuotaLocks diskquota_locks; ExtensionDDLMessage *extension_ddl_message = NULL; @@ -89,12 +91,6 @@ static DiskQuotaWorkerEntry *volatile MyWorkerInfo = NULL; // how many database diskquota are monitoring on static int num_db = 0; -/* how many TableSizeEntry are maintained in all the table_size_map in shared memory*/ -pg_atomic_uint32 *diskquota_table_size_entry_num; - -/* how many QuotaInfoEntry are maintained in all the quota_info_map in shared memory*/ -pg_atomic_uint32 *diskquota_quota_info_entry_num; - static DiskquotaLauncherShmemStruct *DiskquotaLauncherShmem; #define MIN_SLEEPTIME 100 /* milliseconds */ @@ -414,6 +410,12 @@ define_guc_variables(void) DefineCustomIntVariable("diskquota.max_quota_probes", "Max number of quotas on the cluster.", NULL, &diskquota_max_quota_probes, 1024 * 1024, 1024 * INIT_QUOTA_MAP_ENTRIES, INT_MAX, PGC_POSTMASTER, 0, NULL, NULL, NULL); + DefineCustomIntVariable("diskquota.max_reject_entries", "Max number of reject entries per database.", NULL, + &diskquota_max_local_reject_entries, 8192, 1, INT_MAX, PGC_POSTMASTER, 0, NULL, NULL, NULL); + DefineCustomIntVariable("diskquota.hashmap_overflow_report_timeout", + "The duration between each warning report about the shared hashmap overflow (in seconds).", + NULL, &diskquota_hashmap_overflow_report_timeout, 60, 0, INT_MAX / 1000, PGC_SUSET, 0, NULL, + NULL, NULL); } /* ---- Functions for disk quota worker process ---- */ @@ -1802,15 +1804,6 @@ init_launcher_shmem() DiskquotaLauncherShmem->dbArray[i].workerId = INVALID_WORKER_ID; } } - /* init TableSizeEntry counter */ - diskquota_table_size_entry_num = - ShmemInitStruct("diskquota TableSizeEntry counter", sizeof(pg_atomic_uint32), &found); - if (!found) pg_atomic_init_u32(diskquota_table_size_entry_num, 0); - - /* init QuotaInfoEntry counter */ - diskquota_quota_info_entry_num = - ShmemInitStruct("diskquota QuotaInfoEntry counter", sizeof(pg_atomic_uint32), &found); - if (!found) pg_atomic_init_u32(diskquota_quota_info_entry_num, 0); } /* diff --git a/src/diskquota.h b/src/diskquota.h index b3d3481c..7c2bbb15 100644 --- a/src/diskquota.h +++ b/src/diskquota.h @@ -286,6 +286,7 @@ extern Datum diskquota_fetch_table_stat(PG_FUNCTION_ARGS); extern int diskquota_naptime; extern int diskquota_max_active_tables; extern bool diskquota_hardlimit; +extern int diskquota_hashmap_overflow_report_timeout; extern int SEGCOUNT; extern int worker_spi_get_extension_version(int *major, int *minor); @@ -316,4 +317,6 @@ extern HTAB *diskquota_hash_create(const char *tabname, long nelem, HASHC extern HTAB *DiskquotaShmemInitHash(const char *name, long init_size, long max_size, HASHCTL *infoP, int hash_flags, DiskquotaHashFunction hash_function); extern void refresh_monitored_dbid_cache(void); +extern HASHACTION check_hash_fullness(HTAB *hashp, int max_size, const char *warning_message, + TimestampTz *last_overflow_report); #endif diff --git a/src/diskquota_utility.c b/src/diskquota_utility.c index 28b874e8..f306cef8 100644 --- a/src/diskquota_utility.c +++ b/src/diskquota_utility.c @@ -1683,3 +1683,29 @@ DiskquotaShmemInitHash(const char *name, /* table string name fo return ShmemInitHash(name, init_size, max_size, infoP, hash_flags | HASH_BLOBS); #endif /* GP_VERSION_NUM */ } + +/* + * Returns HASH_FIND if hash table is full and HASH_ENTER otherwise. + * It can be used only under lock. + */ +HASHACTION +check_hash_fullness(HTAB *hashp, int max_size, const char *warning_message, TimestampTz *last_overflow_report) +{ + long num_entries = hash_get_num_entries(hashp); + + if (num_entries < max_size) return HASH_ENTER; + + if (num_entries == max_size) + { + TimestampTz current_time = GetCurrentTimestamp(); + + if (*last_overflow_report == 0 || TimestampDifferenceExceeds(*last_overflow_report, current_time, + diskquota_hashmap_overflow_report_timeout * 1000)) + { + ereport(WARNING, (errmsg("[diskquota] %s", warning_message))); + *last_overflow_report = current_time; + } + } + + return HASH_FIND; +} diff --git a/src/gp_activetable.c b/src/gp_activetable.c index 6e76633b..adbd69ef 100644 --- a/src/gp_activetable.c +++ b/src/gp_activetable.c @@ -51,7 +51,12 @@ typedef struct DiskQuotaSetOFCache HASH_SEQ_STATUS pos; } DiskQuotaSetOFCache; -HTAB *active_tables_map = NULL; // Set +static HTAB *active_tables_map = NULL; // Set +TimestampTz active_tables_map_last_overflow_report = 0; + +static const char *active_tables_map_warning = + "the number of active tables reached the limit, please increase " + "the GUC value for diskquota.max_active_tables."; /* * monitored_dbid_cache is a allow list for diskquota @@ -60,7 +65,12 @@ HTAB *active_tables_map = NULL; // Set * dbid will be added to it when creating diskquota extension * dbid will be removed from it when droping diskquota extension */ -HTAB *altered_reloid_cache = NULL; // Set +static HTAB *altered_reloid_cache = NULL; // Set +static TimestampTz altered_reloid_cache_last_overflow_report = 0; + +static const char *altered_reloid_cache_warning = + "the number of altered reloid cache entries reached the limit, please increase " + "the GUC value for diskquota.max_active_tables."; /* active table hooks which detect the disk file size change. */ static file_create_hook_type prev_file_create_hook = NULL; @@ -236,7 +246,9 @@ report_altered_reloid(Oid reloid) if (IsRoleMirror() || IS_QUERY_DISPATCHER()) return; LWLockAcquire(diskquota_locks.altered_reloid_cache_lock, LW_EXCLUSIVE); - hash_search(altered_reloid_cache, &reloid, HASH_ENTER, NULL); + HASHACTION action = check_hash_fullness(altered_reloid_cache, diskquota_max_active_tables, + altered_reloid_cache_warning, &altered_reloid_cache_last_overflow_report); + hash_search(altered_reloid_cache, &reloid, action, NULL); LWLockRelease(diskquota_locks.altered_reloid_cache_lock); } @@ -318,17 +330,11 @@ report_active_table_helper(const RelFileNodeBackend *relFileNode) item.tablespaceoid = relFileNode->node.spcNode; LWLockAcquire(diskquota_locks.active_table_lock, LW_EXCLUSIVE); - entry = hash_search(active_tables_map, &item, HASH_ENTER_NULL, &found); + HASHACTION action = check_hash_fullness(active_tables_map, diskquota_max_active_tables, active_tables_map_warning, + &active_tables_map_last_overflow_report); + entry = hash_search(active_tables_map, &item, action, &found); if (entry && !found) *entry = item; - if (!found && entry == NULL) - { - /* - * We may miss the file size change of this relation at current - * refresh interval. - */ - ereport(WARNING, (errmsg("Share memory is not enough for active tables."))); - } LWLockRelease(diskquota_locks.active_table_lock); } @@ -856,8 +862,9 @@ get_active_tables_oid(void) hash_seq_init(&iter, local_active_table_file_map); while ((active_table_file_entry = (DiskQuotaActiveTableFileEntry *)hash_seq_search(&iter)) != NULL) { - /* TODO: handle possible ERROR here so that the bgworker will not go down. */ - hash_search(active_tables_map, active_table_file_entry, HASH_ENTER, NULL); + HASHACTION action = check_hash_fullness(active_tables_map, diskquota_max_active_tables, + active_tables_map_warning, &active_tables_map_last_overflow_report); + hash_search(active_tables_map, active_table_file_entry, action, NULL); } /* TODO: hash_seq_term(&iter); */ LWLockRelease(diskquota_locks.active_table_lock); @@ -919,7 +926,9 @@ get_active_tables_oid(void) LWLockAcquire(diskquota_locks.active_table_lock, LW_EXCLUSIVE); while ((active_table_file_entry = (DiskQuotaActiveTableFileEntry *)hash_seq_search(&iter)) != NULL) { - entry = hash_search(active_tables_map, active_table_file_entry, HASH_ENTER_NULL, &found); + HASHACTION action = check_hash_fullness(active_tables_map, diskquota_max_active_tables, + active_tables_map_warning, &active_tables_map_last_overflow_report); + entry = hash_search(active_tables_map, active_table_file_entry, action, &found); if (entry) *entry = *active_table_file_entry; } LWLockRelease(diskquota_locks.active_table_lock); diff --git a/src/gp_activetable.h b/src/gp_activetable.h index 6b513fe9..1ff10d60 100644 --- a/src/gp_activetable.h +++ b/src/gp_activetable.h @@ -42,9 +42,7 @@ extern void init_active_table_hook(void); extern void init_shm_worker_active_tables(void); extern void init_lock_active_tables(void); -extern HTAB *active_tables_map; extern HTAB *monitored_dbid_cache; -extern HTAB *altered_reloid_cache; #ifndef atooid #define atooid(x) ((Oid)strtoul((x), NULL, 10)) diff --git a/src/quotamodel.c b/src/quotamodel.c index 58752c05..f558bc66 100644 --- a/src/quotamodel.c +++ b/src/quotamodel.c @@ -45,11 +45,7 @@ #include /* cluster level max size of rejectmap */ -#define MAX_DISK_QUOTA_REJECT_ENTRIES (1024 * 1024) -/* cluster level init size of rejectmap */ -#define INIT_DISK_QUOTA_REJECT_ENTRIES 8192 -/* per database level max size of rejectmap */ -#define MAX_LOCAL_DISK_QUOTA_REJECT_ENTRIES 8192 +#define MAX_DISK_QUOTA_REJECT_ENTRIES (diskquota_max_local_reject_entries * diskquota_max_monitored_databases) /* Number of attributes in quota configuration records. */ #define NUM_QUOTA_CONFIG_ATTRS 6 /* Number of entries for diskquota.table_size update SQL */ @@ -83,13 +79,11 @@ typedef struct RejectMapEntry RejectMapEntry; typedef struct GlobalRejectMapEntry GlobalRejectMapEntry; typedef struct LocalRejectMapEntry LocalRejectMapEntry; -int SEGCOUNT = 0; -extern int diskquota_max_table_segments; -extern pg_atomic_uint32 *diskquota_table_size_entry_num; -extern int diskquota_max_monitored_databases; -extern int diskquota_max_quota_probes; -extern pg_atomic_uint32 *diskquota_quota_info_entry_num; - +int SEGCOUNT = 0; +extern int diskquota_max_table_segments; +extern int diskquota_max_monitored_databases; +extern int diskquota_max_quota_probes; +extern int diskquota_max_local_reject_entries; /* * local cache of table disk size and corresponding schema and owner. * @@ -144,7 +138,13 @@ typedef enum uint16 quota_key_num[NUM_QUOTA_TYPES] = {1, 1, 2, 2, 1}; Oid quota_key_caches[NUM_QUOTA_TYPES][MAX_NUM_KEYS_QUOTA_MAP] = { {NAMESPACEOID}, {AUTHOID}, {NAMESPACEOID, TABLESPACEOID}, {AUTHOID, TABLESPACEOID}, {TABLESPACEOID}}; -HTAB *quota_info_map; +static HTAB *quota_info_map; +/* stored in shared memory */ +static TimestampTz *quota_info_map_last_overflow_report = NULL; + +static const char *quota_info_map_warning = + "the number of quota probe reached the limit, please " + "increase the GUC value for diskquota.max_quota_probes."; /* global rejectmap for which exceed their quota limit */ struct RejectMapEntry @@ -185,11 +185,29 @@ struct LocalRejectMapEntry /* using hash table to support incremental update the table size entry.*/ static HTAB *table_size_map = NULL; +/* stored in shared memory */ +static TimestampTz *table_size_map_last_overflow_report = NULL; + +static const char *table_size_map_warning = + "the number of tables reached the limit, please increase " + "the GUC value for diskquota.max_table_segments."; /* rejectmap for database objects which exceed their quota limit */ static HTAB *disk_quota_reject_map = NULL; static HTAB *local_disk_quota_reject_map = NULL; +static TimestampTz disk_quota_reject_map_last_overflow_report = 0; +/* stored in shared memory */ +static TimestampTz *local_disk_quota_reject_map_last_overflow_report = NULL; + +static const char *disk_quota_reject_map_warning = + "the number of quota reject map entries reached the limit, " + "please increase the GUC value for diskquota.max_reject_entries."; + +static const char *local_disk_quota_reject_map_warning = + "the number of local quota reject map entries reached the limit, " + "please increase the GUC value for diskquota.max_reject_entries."; + static shmem_startup_hook_type prev_shmem_startup_hook = NULL; /* functions to maintain the quota maps */ @@ -199,7 +217,6 @@ static void add_quota_to_rejectmap(QuotaType type, Oid targetOid, Oid tablespace static void refresh_quota_info_map(void); static void clean_all_quota_limit(void); static void transfer_table_for_quota(int64 totalsize, QuotaType type, Oid *old_keys, Oid *new_keys, int16 segid); -static QuotaInfoEntry *put_quota_map_entry(QuotaInfoEntryKey *key, bool *found); /* functions to refresh disk quota model*/ static void refresh_disk_quota_usage(bool is_init); @@ -224,44 +241,6 @@ static void set_table_size_entry_flag(TableSizeEntry *entry, TableSizeEntryFlag static void delete_from_table_size_map(char *str); -/* - * put QuotaInfoEntry into quota_info_map and return this entry. - * return NULL: no free SHM for quota_info_map - * found cannot be NULL - */ -static QuotaInfoEntry * -put_quota_map_entry(QuotaInfoEntryKey *key, bool *found) -{ - QuotaInfoEntry *entry; - uint32 counter = pg_atomic_read_u32(diskquota_quota_info_entry_num); - if (counter >= diskquota_max_quota_probes) - { - entry = hash_search(quota_info_map, key, HASH_FIND, found); - /* - * Too many quotas have been added to the quota_info_map, to avoid diskquota using - * too much shared memory, just return NULL. The diskquota won't work correctly - * anymore. - */ - if (!(*found)) return NULL; - } - else - { - entry = hash_search(quota_info_map, key, HASH_ENTER, found); - if (!(*found)) - { - counter = pg_atomic_add_fetch_u32(diskquota_quota_info_entry_num, 1); - if (counter >= diskquota_max_quota_probes) - { - ereport(WARNING, (errmsg("[diskquota] the number of quota probe exceeds the limit, please " - "increase the GUC value for diskquota.max_quota_probes. Current " - "diskquota.max_quota_probes value: %d", - diskquota_max_quota_probes))); - } - } - } - return entry; -} - /* add a new entry quota or update the old entry quota */ static void update_size_for_quota(int64 size, QuotaType type, Oid *keys, int16 segid) @@ -269,11 +248,14 @@ update_size_for_quota(int64 size, QuotaType type, Oid *keys, int16 segid) bool found; QuotaInfoEntry *entry; QuotaInfoEntryKey key = {0}; + HASHACTION action; memcpy(key.keys, keys, quota_key_num[type] * sizeof(Oid)); key.type = type; key.segid = segid; - entry = put_quota_map_entry(&key, &found); + action = check_hash_fullness(quota_info_map, diskquota_max_quota_probes, quota_info_map_warning, + quota_info_map_last_overflow_report); + entry = hash_search(quota_info_map, &key, action, &found); /* If the number of quota exceeds the limit, entry will be NULL */ if (entry == NULL) return; if (!found) @@ -293,11 +275,14 @@ update_limit_for_quota(int64 limit, float segratio, QuotaType type, Oid *keys) { QuotaInfoEntry *entry; QuotaInfoEntryKey key = {0}; + HASHACTION action; memcpy(key.keys, keys, quota_key_num[type] * sizeof(Oid)); key.type = type; key.segid = i; - entry = put_quota_map_entry(&key, &found); + action = check_hash_fullness(quota_info_map, diskquota_max_quota_probes, quota_info_map_warning, + quota_info_map_last_overflow_report); + entry = hash_search(quota_info_map, &key, action, &found); /* If the number of quota exceeds the limit, entry will be NULL */ if (entry == NULL) continue; if (!found) @@ -325,10 +310,16 @@ add_quota_to_rejectmap(QuotaType type, Oid targetOid, Oid tablespaceoid, bool se keyitem.databaseoid = MyDatabaseId; keyitem.tablespaceoid = tablespaceoid; keyitem.targettype = (uint32)type; - ereport(DEBUG1, (errmsg("[diskquota] Put object %u to rejectmap", targetOid))); - localrejectentry = (LocalRejectMapEntry *)hash_search(local_disk_quota_reject_map, &keyitem, HASH_ENTER, NULL); - localrejectentry->isexceeded = true; - localrejectentry->segexceeded = segexceeded; + HASHACTION action = + check_hash_fullness(local_disk_quota_reject_map, diskquota_max_local_reject_entries, + local_disk_quota_reject_map_warning, local_disk_quota_reject_map_last_overflow_report); + localrejectentry = hash_search(local_disk_quota_reject_map, &keyitem, action, NULL); + if (localrejectentry) + { + ereport(DEBUG1, (errmsg("[diskquota] Put object %u to rejectmap", targetOid))); + localrejectentry->isexceeded = true; + localrejectentry->segexceeded = segexceeded; + } } /* @@ -354,7 +345,6 @@ refresh_quota_info_map(void) if (!HeapTupleIsValid(tuple)) { hash_search(quota_info_map, &entry->key, HASH_REMOVE, NULL); - pg_atomic_fetch_sub_u32(diskquota_quota_info_entry_num, 1); removed = true; break; } @@ -455,7 +445,7 @@ disk_quota_shmem_startup(void) hash_ctl.keysize = sizeof(RejectMapEntry); hash_ctl.entrysize = sizeof(GlobalRejectMapEntry); disk_quota_reject_map = - DiskquotaShmemInitHash("rejectmap whose quota limitation is reached", INIT_DISK_QUOTA_REJECT_ENTRIES, + DiskquotaShmemInitHash("rejectmap whose quota limitation is reached", diskquota_max_local_reject_entries, MAX_DISK_QUOTA_REJECT_ENTRIES, &hash_ctl, HASH_ELEM, DISKQUOTA_TAG_HASH); init_shm_worker_active_tables(); @@ -516,7 +506,11 @@ diskquota_worker_shmem_size() Size size; size = hash_estimate_size(MAX_NUM_TABLE_SIZE_ENTRIES / diskquota_max_monitored_databases + 100, sizeof(TableSizeEntry)); - size = add_size(size, hash_estimate_size(MAX_LOCAL_DISK_QUOTA_REJECT_ENTRIES, sizeof(LocalRejectMapEntry))); + size = add_size(size, hash_estimate_size(diskquota_max_local_reject_entries, sizeof(LocalRejectMapEntry))); + size = add_size(size, hash_estimate_size(MAX_QUOTA_MAP_ENTRIES, sizeof(QuotaInfoEntry))); + size = add_size(size, sizeof(TimestampTz)); // table_size_map_last_overflow_report + size = add_size(size, sizeof(TimestampTz)); // local_disk_quota_reject_map_last_overflow_report + size = add_size(size, sizeof(TimestampTz)); // quota_info_map_last_overflow_report return size; } @@ -540,10 +534,7 @@ DiskQuotaShmemSize(void) if (IS_QUERY_DISPATCHER()) { size = add_size(size, diskquota_launcher_shmem_size()); - size = add_size(size, sizeof(pg_atomic_uint32)); size = add_size(size, diskquota_worker_shmem_size() * diskquota_max_monitored_databases); - size = add_size(size, hash_estimate_size(MAX_QUOTA_MAP_ENTRIES, sizeof(QuotaInfoEntry)) * - diskquota_max_monitored_databases); } return size; @@ -558,6 +549,7 @@ init_disk_quota_model(uint32 id) { HASHCTL hash_ctl; StringInfoData str; + bool found; initStringInfo(&str); format_name("TableSizeEntrymap", id, &str); @@ -566,6 +558,9 @@ init_disk_quota_model(uint32 id) hash_ctl.entrysize = sizeof(TableSizeEntry); table_size_map = DiskquotaShmemInitHash(str.data, INIT_NUM_TABLE_SIZE_ENTRIES, MAX_NUM_TABLE_SIZE_ENTRIES, &hash_ctl, HASH_ELEM, DISKQUOTA_TAG_HASH); + format_name("TableSizeEntrymap_last_overflow_report", id, &str); + table_size_map_last_overflow_report = ShmemInitStruct(str.data, sizeof(TimestampTz), &found); + if (!found) *table_size_map_last_overflow_report = 0; /* for localrejectmap */ /* WARNNING: The max length of name of the map is 48 */ @@ -574,9 +569,13 @@ init_disk_quota_model(uint32 id) hash_ctl.keysize = sizeof(RejectMapEntry); hash_ctl.entrysize = sizeof(LocalRejectMapEntry); local_disk_quota_reject_map = - DiskquotaShmemInitHash(str.data, MAX_LOCAL_DISK_QUOTA_REJECT_ENTRIES, MAX_LOCAL_DISK_QUOTA_REJECT_ENTRIES, + DiskquotaShmemInitHash(str.data, diskquota_max_local_reject_entries, diskquota_max_local_reject_entries, &hash_ctl, HASH_ELEM, DISKQUOTA_TAG_HASH); + format_name("localrejectmap_last_overflow_report", id, &str); + local_disk_quota_reject_map_last_overflow_report = ShmemInitStruct(str.data, sizeof(TimestampTz), &found); + if (!found) *local_disk_quota_reject_map_last_overflow_report = 0; + /* for quota_info_map */ format_name("QuotaInfoMap", id, &str); memset(&hash_ctl, 0, sizeof(hash_ctl)); @@ -584,6 +583,9 @@ init_disk_quota_model(uint32 id) hash_ctl.keysize = sizeof(QuotaInfoEntryKey); quota_info_map = DiskquotaShmemInitHash(str.data, INIT_QUOTA_MAP_ENTRIES, MAX_QUOTA_MAP_ENTRIES, &hash_ctl, HASH_ELEM, DISKQUOTA_TAG_HASH); + format_name("QuotaInfoMap_last_overflow_report", id, &str); + quota_info_map_last_overflow_report = ShmemInitStruct(str.data, sizeof(TimestampTz), &found); + if (!found) *quota_info_map_last_overflow_report = 0; pfree(str.data); } @@ -607,6 +609,7 @@ vacuum_disk_quota_model(uint32 id) TableSizeEntry *tsentry = NULL; LocalRejectMapEntry *localrejectentry; QuotaInfoEntry *qentry; + bool found; HASHCTL hash_ctl; StringInfoData str; @@ -623,22 +626,27 @@ vacuum_disk_quota_model(uint32 id) while ((tsentry = hash_seq_search(&iter)) != NULL) { hash_search(table_size_map, &tsentry->key, HASH_REMOVE, NULL); - pg_atomic_fetch_sub_u32(diskquota_table_size_entry_num, 1); } + format_name("TableSizeEntrymap_last_overflow_report", id, &str); + table_size_map_last_overflow_report = ShmemInitStruct(str.data, sizeof(TimestampTz), &found); + if (!found) *table_size_map_last_overflow_report = 0; /* localrejectmap */ format_name("localrejectmap", id, &str); memset(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(RejectMapEntry); hash_ctl.entrysize = sizeof(LocalRejectMapEntry); local_disk_quota_reject_map = - DiskquotaShmemInitHash(str.data, MAX_LOCAL_DISK_QUOTA_REJECT_ENTRIES, MAX_LOCAL_DISK_QUOTA_REJECT_ENTRIES, + DiskquotaShmemInitHash(str.data, diskquota_max_local_reject_entries, diskquota_max_local_reject_entries, &hash_ctl, HASH_ELEM, DISKQUOTA_TAG_HASH); hash_seq_init(&iter, local_disk_quota_reject_map); while ((localrejectentry = hash_seq_search(&iter)) != NULL) { hash_search(local_disk_quota_reject_map, &localrejectentry->keyitem, HASH_REMOVE, NULL); } + format_name("localrejectmap_last_overflow_report", id, &str); + local_disk_quota_reject_map_last_overflow_report = ShmemInitStruct(str.data, sizeof(TimestampTz), &found); + if (!found) *local_disk_quota_reject_map_last_overflow_report = 0; /* quota_info_map */ format_name("QuotaInfoMap", id, &str); @@ -651,8 +659,10 @@ vacuum_disk_quota_model(uint32 id) while ((qentry = hash_seq_search(&iter)) != NULL) { hash_search(quota_info_map, &qentry->key, HASH_REMOVE, NULL); - pg_atomic_fetch_sub_u32(diskquota_quota_info_entry_num, 1); } + format_name("QuotaInfoMap_last_overflow_report", id, &str); + quota_info_map_last_overflow_report = ShmemInitStruct(str.data, sizeof(TimestampTz), &found); + if (!found) *quota_info_map_last_overflow_report = 0; pfree(str.data); } @@ -1002,45 +1012,32 @@ calculate_table_disk_usage(bool is_init, HTAB *local_active_table_stat_map) key.reloid = relOid; key.id = TableSizeEntryId(cur_segid); - uint32 counter = pg_atomic_read_u32(diskquota_table_size_entry_num); - if (counter > MAX_NUM_TABLE_SIZE_ENTRIES) + HASHACTION action = check_hash_fullness(table_size_map, MAX_NUM_TABLE_SIZE_ENTRIES, table_size_map_warning, + table_size_map_last_overflow_report); + tsentry = hash_search(table_size_map, &key, action, &table_size_map_found); + + if (!table_size_map_found) { - tsentry = (TableSizeEntry *)hash_search(table_size_map, &key, HASH_FIND, &table_size_map_found); - /* Too many tables have been added to the table_size_map, to avoid diskquota using - too much share memory, just quit the loop. The diskquota won't work correctly - anymore. */ - if (!table_size_map_found) + if (tsentry == NULL) { + /* Too many tables have been added to the table_size_map, to avoid diskquota using + too much share memory, just quit the loop. The diskquota won't work correctly + anymore. */ break; } - } - else - { - tsentry = (TableSizeEntry *)hash_search(table_size_map, &key, HASH_ENTER, &table_size_map_found); - if (!table_size_map_found) - { - counter = pg_atomic_add_fetch_u32(diskquota_table_size_entry_num, 1); - if (counter > MAX_NUM_TABLE_SIZE_ENTRIES) - { - ereport(WARNING, (errmsg("[diskquota] the number of tables exceeds the limit, please increase " - "the GUC value for diskquota.max_table_segments. Current " - "diskquota.max_table_segments value: %d", - diskquota_max_table_segments))); - } - tsentry->key.reloid = relOid; - tsentry->key.id = key.id; - Assert(TableSizeEntrySegidStart(tsentry) == cur_segid); - memset(tsentry->totalsize, 0, sizeof(tsentry->totalsize)); - tsentry->owneroid = InvalidOid; - tsentry->namespaceoid = InvalidOid; - tsentry->tablespaceoid = InvalidOid; - tsentry->flag = 0; - - int seg_st = TableSizeEntrySegidStart(tsentry); - int seg_ed = TableSizeEntrySegidEnd(tsentry); - for (int j = seg_st; j < seg_ed; j++) TableSizeEntrySetFlushFlag(tsentry, j); - } + tsentry->key.reloid = relOid; + tsentry->key.id = key.id; + Assert(TableSizeEntrySegidStart(tsentry) == cur_segid); + memset(tsentry->totalsize, 0, sizeof(tsentry->totalsize)); + tsentry->owneroid = InvalidOid; + tsentry->namespaceoid = InvalidOid; + tsentry->tablespaceoid = InvalidOid; + tsentry->flag = 0; + + int seg_st = TableSizeEntrySegidStart(tsentry); + int seg_ed = TableSizeEntrySegidEnd(tsentry); + for (int j = seg_st; j < seg_ed; j++) TableSizeEntrySetFlushFlag(tsentry, j); } /* mark tsentry is_exist */ @@ -1261,7 +1258,6 @@ flush_to_table_size(void) if (!get_table_size_entry_flag(tsentry, TABLE_EXIST)) { hash_search(table_size_map, &tsentry->key, HASH_REMOVE, NULL); - pg_atomic_fetch_sub_u32(diskquota_table_size_entry_num, 1); } } @@ -1305,13 +1301,12 @@ flush_local_reject_map(void) */ if (localrejectentry->isexceeded) { - rejectentry = (GlobalRejectMapEntry *)hash_search(disk_quota_reject_map, (void *)&localrejectentry->keyitem, - HASH_ENTER_NULL, &found); + HASHACTION action = + check_hash_fullness(disk_quota_reject_map, MAX_DISK_QUOTA_REJECT_ENTRIES, + disk_quota_reject_map_warning, &disk_quota_reject_map_last_overflow_report); + rejectentry = hash_search(disk_quota_reject_map, &localrejectentry->keyitem, action, &found); if (rejectentry == NULL) { - ereport(WARNING, (errmsg("[diskquota] Shared disk quota reject map size limit reached." - "Some out-of-limit schemas or roles will be lost" - "in rejectmap."))); continue; } /* new db objects which exceed quota limit */ @@ -2149,7 +2144,10 @@ refresh_rejectmap(PG_FUNCTION_ARGS) */ if (OidIsValid(rejectmapentry->keyitem.targetoid)) continue; - new_entry = hash_search(disk_quota_reject_map, &rejectmapentry->keyitem, HASH_ENTER_NULL, &found); + HASHACTION action = + check_hash_fullness(disk_quota_reject_map, MAX_DISK_QUOTA_REJECT_ENTRIES, disk_quota_reject_map_warning, + &disk_quota_reject_map_last_overflow_report); + new_entry = hash_search(disk_quota_reject_map, &rejectmapentry->keyitem, action, &found); if (!found && new_entry) memcpy(new_entry, rejectmapentry, sizeof(GlobalRejectMapEntry)); } LWLockRelease(diskquota_locks.reject_map_lock); diff --git a/src/relation_cache.c b/src/relation_cache.c index 647779de..b5624c42 100644 --- a/src/relation_cache.c +++ b/src/relation_cache.c @@ -32,6 +32,16 @@ HTAB *relation_cache = NULL; HTAB *relid_cache = NULL; +extern TimestampTz active_tables_map_last_overflow_report; + +static const char *relation_cache_warning = + "the number of relation cache entries reached the limit, please increase " + "the GUC value for diskquota.max_active_tables."; + +static const char *relid_cache_warning = + "the number of relid cache entries reached the limit, please increase " + "the GUC value for diskquota.max_active_tables."; + static void update_relation_entry(Oid relid, DiskQuotaRelationCacheEntry *relation_entry, DiskQuotaRelidCacheEntry *relid_entry); @@ -173,14 +183,31 @@ update_relation_cache(Oid relid) DiskQuotaRelidCacheEntry relid_entry_data = {0}; DiskQuotaRelidCacheEntry *relid_entry; Oid prelid; + HASHACTION action; update_relation_entry(relid, &relation_entry_data, &relid_entry_data); LWLockAcquire(diskquota_locks.relation_cache_lock, LW_EXCLUSIVE); - relation_entry = hash_search(relation_cache, &relation_entry_data.relid, HASH_ENTER, NULL); + + action = check_hash_fullness(relation_cache, diskquota_max_active_tables, relation_cache_warning, + &active_tables_map_last_overflow_report); + relation_entry = hash_search(relation_cache, &relation_entry_data.relid, action, NULL); + + if (relation_entry == NULL) + { + LWLockRelease(diskquota_locks.relation_cache_lock); + return; + } memcpy(relation_entry, &relation_entry_data, sizeof(DiskQuotaRelationCacheEntry)); - relid_entry = hash_search(relid_cache, &relid_entry_data.relfilenode, HASH_ENTER, NULL); + action = check_hash_fullness(relid_cache, diskquota_max_active_tables, relid_cache_warning, + &active_tables_map_last_overflow_report); + relid_entry = hash_search(relid_cache, &relid_entry_data.relfilenode, action, NULL); + if (relid_entry == NULL) + { + LWLockRelease(diskquota_locks.relation_cache_lock); + return; + } memcpy(relid_entry, &relid_entry_data, sizeof(DiskQuotaRelidCacheEntry)); LWLockRelease(diskquota_locks.relation_cache_lock); diff --git a/tests/regress/diskquota_schedule b/tests/regress/diskquota_schedule index 82560063..05baeef3 100644 --- a/tests/regress/diskquota_schedule +++ b/tests/regress/diskquota_schedule @@ -32,6 +32,7 @@ test: test_appendonly test: test_rejectmap test: test_clean_rejectmap_after_drop test: test_rejectmap_mul_db +test: test_rejectmap_limit test: test_ctas_pause test: test_ctas_role test: test_ctas_schema diff --git a/tests/regress/expected/test_activetable_limit.out b/tests/regress/expected/test_activetable_limit.out index c556f32b..5b132120 100644 --- a/tests/regress/expected/test_activetable_limit.out +++ b/tests/regress/expected/test_activetable_limit.out @@ -1,5 +1,6 @@ -- table in 'diskquota not enabled database' should not be activetable -\! gpconfig -c diskquota.max_active_tables -v 2 > /dev/null +\! gpconfig -c diskquota.max_active_tables -v 5 > /dev/null +\! gpconfig -c diskquota.naptime -v 1 > /dev/null \! gpstop -arf > /dev/null \c CREATE DATABASE test_tablenum_limit_01; @@ -13,6 +14,10 @@ INSERT INTO a02 values(generate_series(0, 500)); INSERT INTO a03 values(generate_series(0, 500)); \c test_tablenum_limit_02 CREATE EXTENSION diskquota; +-- we only read the current log file +CREATE EXTERNAL WEB TABLE segment_logs(line text) + EXECUTE 'cat $GP_SEG_DATADIR/pg_log/$(ls -Art $GP_SEG_DATADIR/pg_log | tail -n 1)' + ON ALL FORMAT 'TEXT' (DELIMITER 'OFF'); CREATE SCHEMA s; SELECT diskquota.set_schema_quota('s', '1 MB'); set_schema_quota @@ -26,31 +31,54 @@ SELECT diskquota.wait_for_worker_new_epoch(); t (1 row) -CREATE TABLE s.t1(i int) DISTRIBUTED BY (i); -- activetable = 1 -INSERT INTO s.t1 SELECT generate_series(1, 100000); -- ok. diskquota soft limit does not check when first write +-- We create twice as many tables as the limit to ensure that the active_tables table is overflow. +CREATE TABLE s.t1 (a int, b int) DISTRIBUTED BY (a) + PARTITION BY RANGE (b) ( START (0) END (10) EVERY (1) ); +NOTICE: CREATE TABLE will create partition "t1_1_prt_1" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_2" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_3" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_4" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_5" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_6" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_7" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_8" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_9" for table "t1" +NOTICE: CREATE TABLE will create partition "t1_1_prt_10" for table "t1" +WARNING: [diskquota] the number of active tables reached the limit, please increase the GUC value for diskquota.max_active_tables. +SELECT count(*) FROM segment_logs WHERE line LIKE '%the number of active tables reached the limit%'; + count +------- + 3 +(1 row) + +CREATE TABLE s.t2(i int) DISTRIBUTED BY (i); +INSERT INTO s.t2 SELECT generate_series(1, 100000); SELECT diskquota.wait_for_worker_new_epoch(); wait_for_worker_new_epoch --------------------------- t (1 row) -CREATE TABLE s.t2(i int) DISTRIBUTED BY (i); -- activetable = 2 -INSERT INTO s.t2 SELECT generate_series(1, 10); -- expect failed -ERROR: schema's disk space quota exceeded with name: s -CREATE TABLE s.t3(i int) DISTRIBUTED BY (i); -- activetable = 3 should not crash. -INSERT INTO s.t3 SELECT generate_series(1, 10); -- expect failed -ERROR: schema's disk space quota exceeded with name: s --- Q: why diskquota still works when activetable = 3? --- A: the activetable limit by shmem size, calculate by hash_estimate_size() --- the result will bigger than sizeof(DiskQuotaActiveTableEntry) * max_active_tables --- the real capacity of this data structure based on the hash conflict probability. --- so we can not predict when the data structure will be fill in fully. --- --- this test case is useless, remove this if anyone dislike it. --- but the hash capacity is smaller than 6, so the test case works for issue 51 +INSERT INTO s.t1 SELECT a, a from generate_series(0, 9)a; -- should be successful +SELECT count(*) FROM s.t1; + count +------- + 10 +(1 row) + +-- altered reloid cache overflow check. expected warning. +VACUUM FULL; +WARNING: [diskquota] the number of altered reloid cache entries reached the limit, please increase the GUC value for diskquota.max_active_tables. +SELECT count(*) FROM segment_logs WHERE line LIKE '%the number of altered reloid cache entries reached the limit%'; + count +------- + 3 +(1 row) + DROP EXTENSION diskquota; \c contrib_regression DROP DATABASE test_tablenum_limit_01; DROP DATABASE test_tablenum_limit_02; \! gpconfig -r diskquota.max_active_tables > /dev/null +\! gpconfig -c diskquota.naptime -v 0 > /dev/null \! gpstop -arf > /dev/null diff --git a/tests/regress/expected/test_rejectmap_limit.out b/tests/regress/expected/test_rejectmap_limit.out new file mode 100644 index 00000000..8e1c0eaa --- /dev/null +++ b/tests/regress/expected/test_rejectmap_limit.out @@ -0,0 +1,87 @@ +-- +-- This file contains tests for limiting reject map +-- +\! gpconfig -c diskquota.max_reject_entries -v 4 > /dev/null +\! gpstop -arf > /dev/null +\c +CREATE DATABASE test_reject_map_limit_01; +\c test_reject_map_limit_01 +CREATE EXTENSION diskquota; +SELECT diskquota.wait_for_worker_new_epoch(); + wait_for_worker_new_epoch +--------------------------- + t +(1 row) + +-- we only read the current log file +CREATE EXTERNAL WEB TABLE master_log(line text) + EXECUTE 'cat $GP_SEG_DATADIR/pg_log/$(ls -Art $GP_SEG_DATADIR/pg_log | tail -n 1)' + ON MASTER FORMAT 'TEXT' (DELIMITER 'OFF'); +CREATE SCHEMA s1; +CREATE SCHEMA s2; +CREATE SCHEMA s3; +CREATE SCHEMA s4; +CREATE SCHEMA s5; +SELECT diskquota.set_schema_quota('s1', '1 MB'); + set_schema_quota +------------------ + +(1 row) + +SELECT diskquota.set_schema_quota('s2', '1 MB'); + set_schema_quota +------------------ + +(1 row) + +SELECT diskquota.set_schema_quota('s3', '1 MB'); + set_schema_quota +------------------ + +(1 row) + +SELECT diskquota.set_schema_quota('s4', '1 MB'); + set_schema_quota +------------------ + +(1 row) + +SELECT diskquota.set_schema_quota('s5', '1 MB'); + set_schema_quota +------------------ + +(1 row) + +SELECT diskquota.wait_for_worker_new_epoch(); + wait_for_worker_new_epoch +--------------------------- + t +(1 row) + +CREATE TABLE s1.a(i int) DISTRIBUTED BY (i); +CREATE TABLE s2.a(i int) DISTRIBUTED BY (i); +CREATE TABLE s3.a(i int) DISTRIBUTED BY (i); +CREATE TABLE s4.a(i int) DISTRIBUTED BY (i); +CREATE TABLE s5.a(i int) DISTRIBUTED BY (i); +INSERT INTO s1.a SELECT generate_series(1,100000); +INSERT INTO s2.a SELECT generate_series(1,100000); +INSERT INTO s3.a SELECT generate_series(1,100000); +INSERT INTO s4.a SELECT generate_series(1,100000); +INSERT INTO s5.a SELECT generate_series(1,100000); +SELECT diskquota.wait_for_worker_new_epoch(); + wait_for_worker_new_epoch +--------------------------- + t +(1 row) + +SELECT count(*) FROM master_log WHERE line LIKE '%the number of local quota reject map entries reached the limit%' AND line NOT LIKE '%LOG%'; + count +------- + 1 +(1 row) + +DROP EXTENSION diskquota; +\c contrib_regression +DROP DATABASE test_reject_map_limit_01; +\! gpconfig -r diskquota.max_reject_entries > /dev/null +\! gpstop -arf > /dev/null diff --git a/tests/regress/sql/test_activetable_limit.sql b/tests/regress/sql/test_activetable_limit.sql index 9ab6666a..601d7111 100644 --- a/tests/regress/sql/test_activetable_limit.sql +++ b/tests/regress/sql/test_activetable_limit.sql @@ -1,5 +1,6 @@ -- table in 'diskquota not enabled database' should not be activetable -\! gpconfig -c diskquota.max_active_tables -v 2 > /dev/null +\! gpconfig -c diskquota.max_active_tables -v 5 > /dev/null +\! gpconfig -c diskquota.naptime -v 1 > /dev/null \! gpstop -arf > /dev/null \c @@ -19,29 +20,34 @@ INSERT INTO a03 values(generate_series(0, 500)); \c test_tablenum_limit_02 CREATE EXTENSION diskquota; +-- we only read the current log file +CREATE EXTERNAL WEB TABLE segment_logs(line text) + EXECUTE 'cat $GP_SEG_DATADIR/pg_log/$(ls -Art $GP_SEG_DATADIR/pg_log | tail -n 1)' + ON ALL FORMAT 'TEXT' (DELIMITER 'OFF'); + CREATE SCHEMA s; SELECT diskquota.set_schema_quota('s', '1 MB'); SELECT diskquota.wait_for_worker_new_epoch(); -CREATE TABLE s.t1(i int) DISTRIBUTED BY (i); -- activetable = 1 -INSERT INTO s.t1 SELECT generate_series(1, 100000); -- ok. diskquota soft limit does not check when first write +-- We create twice as many tables as the limit to ensure that the active_tables table is overflow. +CREATE TABLE s.t1 (a int, b int) DISTRIBUTED BY (a) + PARTITION BY RANGE (b) ( START (0) END (10) EVERY (1) ); + +SELECT count(*) FROM segment_logs WHERE line LIKE '%the number of active tables reached the limit%'; + +CREATE TABLE s.t2(i int) DISTRIBUTED BY (i); +INSERT INTO s.t2 SELECT generate_series(1, 100000); SELECT diskquota.wait_for_worker_new_epoch(); -CREATE TABLE s.t2(i int) DISTRIBUTED BY (i); -- activetable = 2 -INSERT INTO s.t2 SELECT generate_series(1, 10); -- expect failed -CREATE TABLE s.t3(i int) DISTRIBUTED BY (i); -- activetable = 3 should not crash. -INSERT INTO s.t3 SELECT generate_series(1, 10); -- expect failed +INSERT INTO s.t1 SELECT a, a from generate_series(0, 9)a; -- should be successful +SELECT count(*) FROM s.t1; + +-- altered reloid cache overflow check. expected warning. +VACUUM FULL; --- Q: why diskquota still works when activetable = 3? --- A: the activetable limit by shmem size, calculate by hash_estimate_size() --- the result will bigger than sizeof(DiskQuotaActiveTableEntry) * max_active_tables --- the real capacity of this data structure based on the hash conflict probability. --- so we can not predict when the data structure will be fill in fully. --- --- this test case is useless, remove this if anyone dislike it. --- but the hash capacity is smaller than 6, so the test case works for issue 51 +SELECT count(*) FROM segment_logs WHERE line LIKE '%the number of altered reloid cache entries reached the limit%'; DROP EXTENSION diskquota; @@ -50,4 +56,5 @@ DROP DATABASE test_tablenum_limit_01; DROP DATABASE test_tablenum_limit_02; \! gpconfig -r diskquota.max_active_tables > /dev/null +\! gpconfig -c diskquota.naptime -v 0 > /dev/null \! gpstop -arf > /dev/null diff --git a/tests/regress/sql/test_rejectmap_limit.sql b/tests/regress/sql/test_rejectmap_limit.sql new file mode 100644 index 00000000..8bed5cf9 --- /dev/null +++ b/tests/regress/sql/test_rejectmap_limit.sql @@ -0,0 +1,55 @@ +-- +-- This file contains tests for limiting reject map +-- + +\! gpconfig -c diskquota.max_reject_entries -v 4 > /dev/null +\! gpstop -arf > /dev/null + +\c + +CREATE DATABASE test_reject_map_limit_01; + +\c test_reject_map_limit_01 +CREATE EXTENSION diskquota; +SELECT diskquota.wait_for_worker_new_epoch(); +-- we only read the current log file +CREATE EXTERNAL WEB TABLE master_log(line text) + EXECUTE 'cat $GP_SEG_DATADIR/pg_log/$(ls -Art $GP_SEG_DATADIR/pg_log | tail -n 1)' + ON MASTER FORMAT 'TEXT' (DELIMITER 'OFF'); + +CREATE SCHEMA s1; +CREATE SCHEMA s2; +CREATE SCHEMA s3; +CREATE SCHEMA s4; +CREATE SCHEMA s5; + +SELECT diskquota.set_schema_quota('s1', '1 MB'); +SELECT diskquota.set_schema_quota('s2', '1 MB'); +SELECT diskquota.set_schema_quota('s3', '1 MB'); +SELECT diskquota.set_schema_quota('s4', '1 MB'); +SELECT diskquota.set_schema_quota('s5', '1 MB'); +SELECT diskquota.wait_for_worker_new_epoch(); + +CREATE TABLE s1.a(i int) DISTRIBUTED BY (i); +CREATE TABLE s2.a(i int) DISTRIBUTED BY (i); +CREATE TABLE s3.a(i int) DISTRIBUTED BY (i); +CREATE TABLE s4.a(i int) DISTRIBUTED BY (i); +CREATE TABLE s5.a(i int) DISTRIBUTED BY (i); + +INSERT INTO s1.a SELECT generate_series(1,100000); +INSERT INTO s2.a SELECT generate_series(1,100000); +INSERT INTO s3.a SELECT generate_series(1,100000); +INSERT INTO s4.a SELECT generate_series(1,100000); +INSERT INTO s5.a SELECT generate_series(1,100000); + +SELECT diskquota.wait_for_worker_new_epoch(); + +SELECT count(*) FROM master_log WHERE line LIKE '%the number of local quota reject map entries reached the limit%' AND line NOT LIKE '%LOG%'; + +DROP EXTENSION diskquota; + +\c contrib_regression +DROP DATABASE test_reject_map_limit_01; + +\! gpconfig -r diskquota.max_reject_entries > /dev/null +\! gpstop -arf > /dev/null