diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 40ccdbaade9..8ce962de422 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1875,7 +1875,7 @@ SCRAM-SHA-256$<iteration count>:<salt>< p = permanent table, u = unlogged table, - t = temporary table + t = temporary table, f = fast temporary table diff --git a/src/backend/access/common/Makefile b/src/backend/access/common/Makefile index 88aa8a59daa..e6515f604bd 100644 --- a/src/backend/access/common/Makefile +++ b/src/backend/access/common/Makefile @@ -12,7 +12,7 @@ subdir = src/backend/access/common top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = heaptuple.o indextuple.o printtup.o reloptions.o scankey.o \ +OBJS = fasttab.o heaptuple.o indextuple.o printtup.o reloptions.o scankey.o \ tupconvert.o tupdesc.o OBJS += bufmask.o memtuple.o reloptions_gp.o diff --git a/src/backend/access/common/fasttab.c b/src/backend/access/common/fasttab.c new file mode 100644 index 00000000000..b43a1fdf2b5 --- /dev/null +++ b/src/backend/access/common/fasttab.c @@ -0,0 +1,1896 @@ +/*------------------------------------------------------------------------- + * + * fasttab.c + * virtual catalog and fast temporary tables + * + * This file contents imlementation of special type of temporary tables --- + * fast temporary tables (FTT). From user perspective they work exactly as + * regular temporary tables. However there are no records about FTTs in + * pg_catalog. These records are stored in backend's memory instead and mixed + * with regular records during scans of catalog tables. We refer to + * corresponding tuples of catalog tables as "in-memory" or "virtual" tuples + * and to all these tuples together --- as "in-memory" or "virtual" catalog. + * + * Note that since temporary tables are visiable only in one session there is + * no need to use shared memory or locks for FTTs. Transactions support is + * very simple too. There is no need to track xmin/xmax, etc. + * + * FTTs are designed to to solve pg_catalog bloating problem. The are + * applications that create and delete a lot of temporary tables. It causes + * bloating of pg_catalog and running auto vacuum on it. It's quite an + * expensive operation that affects entire database performance. + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/access/common/fasttab.c + * + *------------------------------------------------------------------------- + */ + +#include "c.h" +#include "postgres.h" +#include "pgstat.h" +#include "miscadmin.h" +#include "access/amapi.h" +#include "access/fasttab.h" +#include "access/relscan.h" +#include "access/valid.h" +#include "access/sysattr.h" +#include "access/htup_details.h" +#include "catalog/pg_class.h" +#include "catalog/pg_type.h" +#include "catalog/pg_depend.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_statistic.h" +#include "storage/bufmgr.h" +#include "utils/rel.h" +#include "utils/inval.h" +#include "utils/memutils.h" + +/***************************************************************************** + TYPEDEFS, MACRO DECLARATIONS AND CONST STATIC VARIABLES + *****************************************************************************/ + +/* #define FASTTAB_DEBUG 1 */ + +#ifdef FASTTAB_DEBUG +static int32 fasttab_scan_tuples_counter = -1; +#endif + +/* List of in-memory tuples. */ +typedef struct +{ + dlist_node node; + HeapTuple tup; +} DListHeapTupleData; + +typedef DListHeapTupleData *DListHeapTuple; + +/* Like strcmp but for integer types --- int, uint32, Oid, etc. */ +#define FasttabCompareInts(x, y) ( (x) == (y) ? 0 : ( (x) > (y) ? 1 : -1 )) + +/* Forward declaration is required for relation_is_inmem_tuple_function */ +struct FasttabSnapshotData; +typedef struct FasttabSnapshotData *FasttabSnapshot; + +/* Predicate that determines whether given tuple should be stored in-memory */ +typedef bool (*relation_is_inmem_tuple_function) + (Relation relation, HeapTuple tup, FasttabSnapshot fasttab_snapshot, + int tableIdx); + +/* Capacity of FasttabRelationMethods->attrNumbers, see below */ +#define FasttabRelationMaxOidAttributes 2 + +/* FasttabRelationMethodsTable entry */ +typedef const struct +{ + /* relation oid */ + Oid relationId; + /* predicate that determines whether tuple should be stored in-memory */ + relation_is_inmem_tuple_function is_inmem_tuple_fn; + /* number of attributes in attrNumbers array */ + AttrNumber noidattr; + /* attributes that reference to pg_class records */ + AttrNumber attrNumbers[FasttabRelationMaxOidAttributes]; +} FasttabRelationMethodsData; + +typedef FasttabRelationMethodsData const *FasttabRelationMethods; + +/* Forward declaration of all possible is_inmem_tuple_fn values */ +static bool generic_is_inmem_tuple(Relation relation, HeapTuple tup, + FasttabSnapshot fasttab_snapshot, int tableIdx); +static bool pg_class_is_inmem_tuple(Relation relation, HeapTuple tup, + FasttabSnapshot fasttab_snapshot, int tableIdx); + +/* + * Static information necessary to determine whether given tuple of given + * relation should be stored in-memory or not. + * + * NB: Keep this array sorted by relationId. + */ +static FasttabRelationMethodsData FasttabRelationMethodsTable[] = +{ + /* 1247 */ + {TypeRelationId, &generic_is_inmem_tuple, 1, + {Anum_pg_type_typrelid, 0} + }, + /* 1249 */ + {AttributeRelationId, &generic_is_inmem_tuple, 1, + {Anum_pg_attribute_attrelid, 0} + }, + /* 1259 */ + {RelationRelationId, &pg_class_is_inmem_tuple, 0, + {0, 0} + }, + /* 2608 */ + {DependRelationId, &generic_is_inmem_tuple, 2, + {Anum_pg_depend_objid, Anum_pg_depend_refobjid} + }, + /* 2611 */ + {InheritsRelationId, &generic_is_inmem_tuple, 2, + {Anum_pg_inherits_inhrelid, Anum_pg_inherits_inhparent} + }, + /* 2619 */ + {StatisticRelationId, &generic_is_inmem_tuple, 1, + {Anum_pg_statistic_starelid, 0} + }, +}; + +/* Number of tables that can have a virtual part */ +#define FasttabSnapshotTablesNumber (lengthof(FasttabRelationMethodsTable)) + +/* Possible values of FasttabIndexMethods->attrCompareMethod[], see below */ +typedef enum FasttabCompareMethod +{ + CompareInvalid, /* invalid value */ + CompareOid, /* compare attributes as oids */ + CompareCString, /* compare attributes as strings */ + CompareInt16, /* compare attributes as int16's */ + CompareInt64, /* compare attributes as int64's */ + CompareBoolean, /* compare attributes as booleans */ +} FasttabCompareMethod; + +/* Capacity of FasttabIndexMethods->attrNumbers, see below */ +#define FasttabIndexMaxAttributes 3 + +/* + * FasttabIndexMethodsTable entry. + * + * NB: typedef is located in fasttab.h + */ +struct FasttabIndexMethodsData +{ + /* index oid */ + Oid indexId; + /* number of indexed attributes */ + AttrNumber nattr; + /* indexed attributes (NB: attribute number can be negative) */ + AttrNumber attrNumbers[FasttabIndexMaxAttributes]; + /* how to compare attributes */ + FasttabCompareMethod attrCompareMethod[FasttabIndexMaxAttributes]; +}; + +/* + * Static information required for sorting virtual tuples during index scans. + * + * NB: Keep this array sorted by indexId. + * + * NB: Uniqueness information is currently not used. Still please keep + * comments regarding uniqueness, for possible use in the future. + */ +static FasttabIndexMethodsData FasttabIndexMethodsTable[] = +{ + /* 2187, non-unique */ + {InheritsParentIndexId, 1, + {Anum_pg_inherits_inhparent, 0, 0}, + {CompareOid, CompareInvalid, CompareInvalid} + }, + /* 2658, unique */ + {AttributeRelidNameIndexId, 2, + {Anum_pg_attribute_attrelid, Anum_pg_attribute_attname, 0}, + {CompareOid, CompareCString, CompareInvalid} + }, + /* 2659, unique */ + {AttributeRelidNumIndexId, 2, + {Anum_pg_attribute_attrelid, Anum_pg_attribute_attnum, 0}, + {CompareOid, CompareInt16, CompareInvalid} + }, + /* 2662, unique */ + {ClassOidIndexId, 1, + {ObjectIdAttributeNumber, 0, 0}, + {CompareOid, CompareInvalid, CompareInvalid} + }, + /* 2663, unique */ + {ClassNameNspIndexId, 2, + {Anum_pg_class_relname, Anum_pg_class_relnamespace, 0}, + {CompareCString, CompareOid, CompareInvalid} + }, + /* 2673, non-unique */ + {DependDependerIndexId, 3, + {Anum_pg_depend_classid, Anum_pg_depend_objid, Anum_pg_depend_objsubid}, + {CompareOid, CompareOid, CompareInt64} + }, + /* 2674, non-unique */ + {DependReferenceIndexId, 3, + {Anum_pg_depend_refclassid, Anum_pg_depend_refobjid, + Anum_pg_depend_refobjsubid}, + {CompareOid, CompareOid, CompareInt64} + }, + /* 2680, unique */ + {InheritsRelidSeqnoIndexId, 2, + {Anum_pg_inherits_inhrelid, Anum_pg_inherits_inhseqno, 0}, + {CompareOid, CompareOid, CompareInvalid} + }, + /* 2696, unique */ + {StatisticRelidAttnumInhIndexId, 3, + {Anum_pg_statistic_starelid, Anum_pg_statistic_staattnum, + Anum_pg_statistic_stainherit}, + {CompareOid, CompareInt16, CompareBoolean} + }, + /* 2703, unique */ + {TypeOidIndexId, 1, + {ObjectIdAttributeNumber, 0, 0}, + {CompareOid, CompareInvalid, CompareInvalid} + }, + /* 2704, unique */ + {TypeNameNspIndexId, 2, + {Anum_pg_type_typname, Anum_pg_type_typnamespace, 0}, + {CompareCString, CompareOid, CompareInvalid} + }, + /* 3455, non-unique */ + {ClassTblspcRelfilenodeIndexId, 2, + {Anum_pg_class_reltablespace, Anum_pg_class_relfilenode, 0}, + {CompareOid, CompareOid, CompareInvalid} + }, +}; + +/* List of virtual tuples of single relation */ +typedef struct +{ + int tuples_num; /* number of virtual tuples */ + dlist_head tuples; /* list of virtual tuples */ +} FasttabSnapshotRelationData; + +/* + * Snapshot represents state of virtual heap for current transaction or + * savepoint. + */ +struct FasttabSnapshotData +{ + /* Previous snapshot to rollback to. */ + struct FasttabSnapshotData *prev; + /* Optional name of a savepoint. Can be NULL. */ + char *name; + /* State of relations that can contain virtual tuples */ + FasttabSnapshotRelationData relationData[FasttabSnapshotTablesNumber]; +} FasttabSnapshotData; + +/* Determine whether given snapshot is a root snapshot. */ +#define FasttabSnapshotIsRoot(sn) ( !PointerIsValid((sn)->prev) ) + +/* Determine whether given snapshot is anonymous. */ +#define FasttabSnapshotIsAnonymous(sn) ( !PointerIsValid((sn)->name) ) + +/* Determine whether there is a transaction in progress. */ +#define FasttabTransactionInProgress() \ + ( PointerIsValid(FasttabSnapshotGetCurrent()->prev)) + +/***************************************************************************** + GLOBAL VARIABLES + *****************************************************************************/ + +/* Memory context used to store virtual catalog */ +static MemoryContext LocalMemoryContextPrivate = NULL; + +/* Counters used to generate unique virtual ItemPointers */ +static uint32 CurrentFasttabBlockId = 0; +static uint16 CurrentFasttabOffset = 1; /* NB: 0 is considered invalid */ + +/* Current snapshot */ +static FasttabSnapshot CurrentFasttabSnapshotPrivate = NULL; + +/* Current relpersistence hint value */ +static char CurrentRelpersistenceHint = RELPERSISTENCE_UNDEFINED; + +/***************************************************************************** + UTILITY PROCEDURES + *****************************************************************************/ + +/* + * Set relpersistence hint. + * + * Usualy to figure out wheter tuple should be stored in-memory or not we use + * in-memory part of pg_class table. Unfortunately during table creation some + * tuples are stored in catalog tables _before_ modification of pg_class table. + * So there is no way to tell that these tuples should be in-memory. + * + * In these rare cases we set a hint with relperistence value of a table we + * about to create. This not only solves a problem described above but also + * allows to run described check much faster. + */ +void +fasttab_set_relpersistence_hint(char relpersistence) +{ + CurrentRelpersistenceHint = relpersistence; +} + +/* + * Clear relpersisntence hint. + */ +void +fasttab_clear_relpersistence_hint(void) +{ + CurrentRelpersistenceHint = RELPERSISTENCE_UNDEFINED; +} + +/* + * Get memory context for storing virtual catalog. Create one if necessary. + */ +static MemoryContext +GetLocalMemoryContext(void) +{ + if (!PointerIsValid(LocalMemoryContextPrivate)) + { + LocalMemoryContextPrivate = AllocSetContextCreate( + NULL, + "Virtual catalog memory context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + } + + return LocalMemoryContextPrivate; +} + +/* + * Generate unique virtual ItemPointer + */ +static ItemPointerData +GenFasttabItemPointerData(void) +{ + ItemPointerData res; + + BlockIdSet(&(res.ip_blkid), CurrentFasttabBlockId); + res.ip_posid = CurrentFasttabOffset | FASTTAB_ITEM_POINTER_BIT; + + CurrentFasttabOffset++; + + if (CurrentFasttabOffset > MaxHeapTuplesPerPage) + { + CurrentFasttabOffset = 1; + CurrentFasttabBlockId++; + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: GenFasttabItemPointerData, CurrentFasttabOffset > MaxHeapTuplesPerPage (%d), new values - CurrentFasttabOffset = %d, CurrentFasttabBlockId = %d", + MaxHeapTuplesPerPage, CurrentFasttabOffset, CurrentFasttabBlockId); +#endif + } + + return res; +} + +/* + * Find FasttabRelationMethodsTable index by relation oid using binary search. + * + * Not for direct usage. GetSnapshotRelationIdxByOid should be used instead. + * + * Return values: + * == -1 - not found + * >= 0 - found on N-th position + */ +static int +GetSnapshotRelationIdxByOidInternal(Oid relId) +{ + int begin = 0; + int end = FasttabSnapshotTablesNumber - 1; + +#ifdef USE_ASSERT_CHECKING + /* Test that FasttabRelationMethodsTable is properly sorted */ + int i; + + for (i = 0; i <= end; i++) + { + Assert(PointerIsValid(FasttabRelationMethodsTable[i].is_inmem_tuple_fn)); + if (i > 0) + Assert(FasttabRelationMethodsTable[i - 1].relationId < FasttabRelationMethodsTable[i].relationId); + } +#endif + + while (begin < end) + { + int test = (begin + end) / 2; + + if (FasttabRelationMethodsTable[test].relationId == relId) + { + begin = test; + break; + } + + if (FasttabRelationMethodsTable[test].relationId < relId) + begin = test + 1; /* go right */ + else + end = test - 1; /* go left */ + } + + if (FasttabRelationMethodsTable[begin].relationId == relId) + return begin; /* found */ + else + return -1; /* not found */ +} + +/* + * Determine FasttabRelationMethodsTable index by relation oid. + */ +static inline int +GetSnapshotRelationIdxByOid(Oid relId) +{ + int result; + + Assert(IsFasttabHandledRelationId(relId)); + result = GetSnapshotRelationIdxByOidInternal(relId); + Assert(result >= 0 && result < FasttabSnapshotTablesNumber); + return result; +} + +/* + * Determine whether relation with given oid can have virtual tuples. + */ +bool +IsFasttabHandledRelationId(Oid relId) +{ + return (GetSnapshotRelationIdxByOidInternal(relId) >= 0); +} + +/* + * Find FasttabIndexMethodsTable entry by index oid using binary search. + * + * Not for direct usage. GetFasttabIndexMethods should be used instead. + * + * Return values: + * == NULL - not found + * != NULL - found + */ +static FasttabIndexMethods +GetFasttabIndexMethodsInternal(Oid indexId) +{ + int begin = 0; + int end = (sizeof(FasttabIndexMethodsTable) / + sizeof(FasttabIndexMethodsTable[0]) - 1); + +#ifdef USE_ASSERT_CHECKING + /* Test that FasttabIndexMethodsTable is properly sorted. */ + int i; + + for (i = 0; i <= end; i++) + { + if (i > 0) + Assert(FasttabIndexMethodsTable[i - 1].indexId < FasttabIndexMethodsTable[i].indexId); + } +#endif + + while (begin < end) + { + int test = (begin + end) / 2; + + if (FasttabIndexMethodsTable[test].indexId == indexId) + { + begin = test; + break; + } + + if (FasttabIndexMethodsTable[test].indexId < indexId) + begin = test + 1; /* go right */ + else + end = test - 1; /* go left */ + } + + if (FasttabIndexMethodsTable[begin].indexId == indexId) + return &FasttabIndexMethodsTable[begin]; /* found */ + else + return NULL; /* not found */ +} + +/* + * Determine whether index with given oid has a virtual part. + */ +bool +IsFasttabHandledIndexId(Oid indexId) +{ + return (GetFasttabIndexMethodsInternal(indexId) != NULL); +} + +/* + * Find FasttabIndexMethodsTable entry by index oid using binary search. + */ +static inline FasttabIndexMethods +GetFasttabIndexMethods(Oid indexId) +{ + Assert(IsFasttabHandledIndexId(indexId)); + return GetFasttabIndexMethodsInternal(indexId); +} + +/* + * Free single DListHeapTuple + */ +static void +DListHeapTupleFree(DListHeapTuple dlist_tup) +{ + heap_freetuple(dlist_tup->tup); + pfree(dlist_tup); +} + +/* + * Free list of DListHeapTuple's + */ +static void +FasttabDListFree(dlist_head *head) +{ + while (!dlist_is_empty(head)) + { + DListHeapTuple dlist_tup = (DListHeapTuple) dlist_pop_head_node(head); + + DListHeapTupleFree(dlist_tup); + } +} + +/* + * Create a new empty snapshot. + */ +static FasttabSnapshot +FasttabSnapshotCreateEmpty(void) +{ + FasttabSnapshot result; + MemoryContext oldctx = MemoryContextSwitchTo(GetLocalMemoryContext()); + + result = palloc0(sizeof(FasttabSnapshotData)); + MemoryContextSwitchTo(oldctx); + return result; +} + +/* + * Create a snapshot copy. + */ +static FasttabSnapshot +FasttabSnapshotCopy(FasttabSnapshot src, const char *dst_name) +{ + int idx; + dlist_iter iter; + MemoryContext oldctx; + FasttabSnapshot dst = FasttabSnapshotCreateEmpty(); + + oldctx = MemoryContextSwitchTo(GetLocalMemoryContext()); + dst->name = dst_name ? pstrdup(dst_name) : NULL; + + for (idx = 0; idx < FasttabSnapshotTablesNumber; idx++) + { + dst->relationData[idx].tuples_num = src->relationData[idx].tuples_num; + dlist_foreach(iter, &src->relationData[idx].tuples) + { + DListHeapTuple src_dlist_tup = (DListHeapTuple) iter.cur; + DListHeapTuple dst_dlist_tup = palloc(sizeof(DListHeapTupleData)); + + dst_dlist_tup->tup = heap_copytuple(src_dlist_tup->tup); + dlist_push_tail(&dst->relationData[idx].tuples, + &dst_dlist_tup->node); + } + } + + MemoryContextSwitchTo(oldctx); + return dst; +} + +/* + * Free snapshot. + */ +static void +FasttabSnapshotFree(FasttabSnapshot fasttab_snapshot) +{ + int idx; + + for (idx = 0; idx < FasttabSnapshotTablesNumber; idx++) + FasttabDListFree(&fasttab_snapshot->relationData[idx].tuples); + + if (PointerIsValid(fasttab_snapshot->name)) + pfree(fasttab_snapshot->name); + + pfree(fasttab_snapshot); +} + +/* + * Get current snapshot. Create one if necessary. + */ +static FasttabSnapshot +FasttabSnapshotGetCurrent(void) +{ + if (!PointerIsValid(CurrentFasttabSnapshotPrivate)) + CurrentFasttabSnapshotPrivate = FasttabSnapshotCreateEmpty(); + + return CurrentFasttabSnapshotPrivate; +} + +/* + * Places a snapshot on top of snapshots stack. Placed snapshot becomes + * current. + */ +static inline void +FasttabSnapshotPushBack(FasttabSnapshot fasttab_snapshot) +{ + fasttab_snapshot->prev = FasttabSnapshotGetCurrent(); + CurrentFasttabSnapshotPrivate = fasttab_snapshot; +} + +/* + * Removes snapshot from top of snapshots stack. + * + * Returns valid FasttabSnapshot or NULL if only root snapshot left. + */ +static FasttabSnapshot +FasttabSnapshotPopBack(void) +{ + FasttabSnapshot curr = FasttabSnapshotGetCurrent(); + + if (FasttabSnapshotIsRoot(curr)) + return NULL; + + CurrentFasttabSnapshotPrivate = curr->prev; + curr->prev = NULL; + return curr; +} + +/* + * Creates a copy of current snapshot with given name (can be NULL) and places + * it on top of snapshots stack. This copy becomes current snapshot. + */ +static void +FasttabSnapshotCreate(const char *name) +{ + FasttabSnapshot src = FasttabSnapshotGetCurrent(); + FasttabSnapshot dst = FasttabSnapshotCopy(src, name); + + FasttabSnapshotPushBack(dst); +} + +/* + * Makes given snapshot a root one. + */ +static void +FasttabSnapshotPushFront(FasttabSnapshot fasttab_snapshot) +{ + FasttabSnapshot temp = FasttabSnapshotGetCurrent(); + + while (!FasttabSnapshotIsRoot(temp)) + temp = temp->prev; + + temp->prev = fasttab_snapshot; + fasttab_snapshot->prev = NULL; +} + +/***************************************************************************** + MAIN PROCEDURES + *****************************************************************************/ + +/* + * Make preparations related to virtual catalog on transaction begin. + * + * NB: There could be already a transaction in progress. + */ +void +fasttab_begin_transaction(void) +{ +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_begin_transaction, transaction is already in progress: %u", + FasttabTransactionInProgress()); +#endif + + if (FasttabTransactionInProgress()) + return; + + /* begin transaction */ + FasttabSnapshotCreate(NULL); + Assert(FasttabTransactionInProgress()); + Assert(FasttabSnapshotIsAnonymous(FasttabSnapshotGetCurrent())); +} + +/* + * Perform actions related to virtual catalog on transaction commit. + * + * NB: There could be actually no transaction in progress. + */ +void +fasttab_end_transaction(void) +{ +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_end_transaction result = %u (1 - commit, 0 - rollback)" + ", transaction is in progress: %u", result, FasttabTransactionInProgress()); +#endif + + if (!FasttabTransactionInProgress()) + return; + + Assert(FasttabSnapshotIsAnonymous(FasttabSnapshotGetCurrent())); + + /* Commit transaction. 1) Save top snapshot to the bottom of the stack. */ + FasttabSnapshotPushFront(FasttabSnapshotPopBack()); + /* 2) get rid of all snapshots except the root one */ + fasttab_abort_transaction(); +} + +/* + * Perform actions related to virtual catalog on transaction abort. + * + * NB: There could be in fact no transaction running. + */ +void +fasttab_abort_transaction(void) +{ + FasttabSnapshot fasttab_snapshot; + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_abort_transaction, transaction is in progress: %u (it's OK if this procedure is called from fasttab_end_transaction - see the code)", + FasttabTransactionInProgress()); +#endif + + if (!FasttabTransactionInProgress()) + return; + + for (;;) + { + fasttab_snapshot = FasttabSnapshotPopBack(); + if (!fasttab_snapshot) /* root snapshot reached */ + break; + + FasttabSnapshotFree(fasttab_snapshot); + } + + Assert(!FasttabTransactionInProgress()); +} + +/* + * Perform actions related to virtual catalog on savepoint creation. + */ +void +fasttab_define_savepoint(const char *name) +{ + Assert(FasttabTransactionInProgress()); + Assert(FasttabSnapshotIsAnonymous(FasttabSnapshotGetCurrent())); + + /* + * Value of `name` argument can be NULL in 'rollback to savepoint' case. + * This case is already handled by fasttab_rollback_to_savepoint. + */ + if (!PointerIsValid(name)) + return; + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_define_safepoint, name = '%s'", name); +#endif + + FasttabSnapshotCreate(name); /* savepoint to rollback to */ + FasttabSnapshotCreate(NULL); /* current snapshot to store changes */ + + Assert(FasttabTransactionInProgress()); +} + +/* + * Perform actions related to virtual catalog on `rollback to savepoint`. + * + * NB: There is no need to re-check case of savepoint name (upper / lower) or + * that savepoint exists. + */ +void +fasttab_rollback_to_savepoint(const char *name) +{ + Assert(PointerIsValid(name)); + Assert(FasttabTransactionInProgress()); + Assert(FasttabSnapshotIsAnonymous(FasttabSnapshotGetCurrent())); + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_rollback_to_savepoint, name = '%s'", name); +#endif + + /* + * Pop snapshots from the stack and free them until a snapshot with given + * name will be reached. + */ + for (;;) + { + FasttabSnapshot fasttab_snapshot = FasttabSnapshotGetCurrent(); + + Assert(!FasttabSnapshotIsRoot(fasttab_snapshot)); + + if ((!FasttabSnapshotIsAnonymous(fasttab_snapshot)) && + (strcmp(fasttab_snapshot->name, name) == 0)) + break; + + FasttabSnapshotFree(FasttabSnapshotPopBack()); + } + + /* Create a new current snapshot to store changes. */ + FasttabSnapshotCreate(NULL); +} + +/* + * (Re)initialize part of `scan` related to virtual catalog during heap + * (re)scan. + */ +void +fasttab_beginscan(HeapScanDesc scan) +{ + int idx; + Oid relid = RelationGetRelid(scan->rs_rd); + FasttabSnapshot fasttab_snapshot; + + if (!IsFasttabHandledRelationId(relid)) + return; + + fasttab_snapshot = FasttabSnapshotGetCurrent(); + + idx = GetSnapshotRelationIdxByOid(relid); + if (dlist_is_empty(&fasttab_snapshot->relationData[idx].tuples)) + scan->rs_curr_inmem_tupnode = NULL; + else + scan->rs_curr_inmem_tupnode = dlist_head_node(&fasttab_snapshot->relationData[idx].tuples); + +#ifdef FASTTAB_DEBUG + fasttab_scan_tuples_counter = 0; + elog(NOTICE, "FASTTAB: fasttab_beginscan, returning scan = %p, rs_curr_inmem_tupnode = %p", scan, scan->rs_curr_inmem_tupnode); +#endif +} + +/* + * Returns next virtual tuple during heap scan or NULL if there are no more + * virtual tuples. Basically heap_getnext implementation for virtual catalog. + */ +HeapTuple +fasttab_getnext(HeapScanDesc scan, ScanDirection direction) +{ + bool match; + int idx; + FasttabSnapshot fasttab_snapshot; + DListHeapTuple dlist_tup; + dlist_node *ret_node; + + if (!IsFasttabHandledRelationId(RelationGetRelid(scan->rs_rd))) + return NULL; + + /* Other directions are never used for pg_catalog. */ + Assert(ScanDirectionIsForward(direction)); + + fasttab_snapshot = FasttabSnapshotGetCurrent(); + idx = GetSnapshotRelationIdxByOid(RelationGetRelid(scan->rs_rd)); + + /* + * Simple strategy - first return all in-memory tuples, then proceed with + * others. + */ + while (scan->rs_curr_inmem_tupnode) /* inmemory tuples enumiration is + * still in progress? */ + { + ret_node = scan->rs_curr_inmem_tupnode; + + if (dlist_has_next(&fasttab_snapshot->relationData[idx].tuples, ret_node)) + scan->rs_curr_inmem_tupnode = dlist_next_node(&fasttab_snapshot->relationData[idx].tuples, ret_node); + else + scan->rs_curr_inmem_tupnode = NULL; + + dlist_tup = (DListHeapTuple) ret_node; + +#ifdef FASTTAB_DEBUG + fasttab_scan_tuples_counter++; + elog(NOTICE, "FASTTAB: fasttab_getnext, scan = %p, counter = %u, direction = %d, return tuple t_self = %08X/%04X, oid = %d", + scan, fasttab_scan_tuples_counter, direction, + BlockIdGetBlockNumber(&dlist_tup->tup->t_self.ip_blkid), dlist_tup->tup->t_self.ip_posid, HeapTupleGetOid(dlist_tup->tup) + ); +#endif + + /* HeapKeyTest is a macro, it changes `match` variable */ + HeapKeyTest(dlist_tup->tup, RelationGetDescr(scan->rs_rd), scan->rs_nkeys, scan->rs_key, match); + if (!match) + continue; + + return dlist_tup->tup; + } + + /* There are not more virtual tuples. */ + return NULL; +} + +/* + * Pretend searching HOT chain for virtual tuple. + * + * Basically heap_hot_search_buffer implementation for virtual catalog. + */ +bool +fasttab_hot_search_buffer(ItemPointer tid, Relation relation, + HeapTuple heapTuple, bool *all_dead, bool *result) +{ + FasttabSnapshot fasttab_snapshot; + dlist_iter iter; + int idx; + bool found = false; + + if (!IsFasttabItemPointer(tid)) + return false; + + Assert(IsFasttabHandledRelationId(RelationGetRelid(relation))); + + fasttab_snapshot = FasttabSnapshotGetCurrent(); + idx = GetSnapshotRelationIdxByOid(RelationGetRelid(relation)); + dlist_foreach(iter, &fasttab_snapshot->relationData[idx].tuples) + { + DListHeapTuple dlist_tup = (DListHeapTuple) iter.cur; + + if (ItemPointerEquals(&dlist_tup->tup->t_self, tid)) + { + memcpy(heapTuple, dlist_tup->tup, sizeof(HeapTupleData)); + found = true; + break; + } + } + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_hot_search_buffer, tid = %08X/%04X, found = %u", + BlockIdGetBlockNumber(&tid->ip_blkid), tid->ip_posid, found); +#endif + + /* `all_dead` can be NULL during bitmap scan */ + if (all_dead) + *all_dead = false; + + /* `result` can be false in ALTER TABLE case */ + *result = found; + return true; +} + +/* + * Insert a tuple. Basically heap_insert implementation for virtual tuples. + * Returns true if tuple was inserted, false otherwise. + */ +bool +fasttab_insert(Relation relation, HeapTuple tup, HeapTuple heaptup, Oid *result) +{ + FasttabSnapshot fasttab_snapshot; + MemoryContext oldctx; + DListHeapTuple dlist_tup; + int idx = GetSnapshotRelationIdxByOidInternal(RelationGetRelid(relation)); + + if (idx < 0) /* i.e. `!IsFasttabHandledRelationId` */ + return false; + + fasttab_snapshot = FasttabSnapshotGetCurrent(); + + /* + * Check whether tuple should be stored in-memory. + * + * NB: passing `idx` is kind of optimization, it could be actually + * re-calculated from `relation` argument. + */ + if (!FasttabRelationMethodsTable[idx].is_inmem_tuple_fn(relation, + tup, fasttab_snapshot, idx)) + return false; + + oldctx = MemoryContextSwitchTo(GetLocalMemoryContext()); + heaptup->t_self = GenFasttabItemPointerData(); + dlist_tup = palloc(sizeof(DListHeapTupleData)); + dlist_tup->tup = heap_copytuple(heaptup); + MemoryContextSwitchTo(oldctx); + + dlist_push_tail(&fasttab_snapshot->relationData[idx].tuples, + &dlist_tup->node); + fasttab_snapshot->relationData[idx].tuples_num++; + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_insert, dlist_tup->tup->t_self = %08X/%04X, oid = %d, inmemory tuples num = %d, heaptup oid = %d, idx = %d, relation relid = %d", + BlockIdGetBlockNumber(&dlist_tup->tup->t_self.ip_blkid), + dlist_tup->tup->t_self.ip_posid, HeapTupleGetOid(dlist_tup->tup), + fasttab_snapshot->relationData[idx].tuples_num, + HeapTupleGetOid(heaptup), idx, RelationGetRelid(relation) + ); +#endif + + CacheInvalidateHeapTuple(relation, dlist_tup->tup, NULL); + pgstat_count_heap_insert(relation, 1); + if (heaptup != tup) + { + tup->t_self = heaptup->t_self; + heap_freetuple(heaptup); + } + + *result = HeapTupleGetOid(tup); + return true; +} + +/* + * Remove pg_depend and pg_type records that would be kept in memory otherwise + * when relation with given Oid is deleted. Basically here we are solving the + * same issue that is solved by relpersistence hint, but during table deletion, + * not creation. + * + * Used in fasttab_delete. + */ +static void +fasttab_clean_catalog_on_relation_delete(Oid reloid) +{ + Oid curroid = reloid; + FasttabSnapshot fasttab_snapshot = FasttabSnapshotGetCurrent(); + int dependIdx = GetSnapshotRelationIdxByOid(DependRelationId); + int typeIdx = GetSnapshotRelationIdxByOid(TypeRelationId); + Relation dependRel = relation_open(DependRelationId, AccessShareLock); + Relation typeRel = relation_open(TypeRelationId, AccessShareLock); + ItemPointerData itemPointerData; + + for (;;) + { + dlist_iter iter; + bool isnull, + found = false; + + /* Find pg_depend tuple with refobjid == curroid. */ + dlist_foreach(iter, &fasttab_snapshot->relationData[dependIdx].tuples) + { + DListHeapTuple dlist_tup = (DListHeapTuple) iter.cur; + Oid refobjid = DatumGetObjectId(heap_getattr(dlist_tup->tup, Anum_pg_depend_refobjid, + RelationGetDescr(dependRel), &isnull)); + + if (refobjid == curroid) + { + found = true; + /* curroid := tuple.objid */ + curroid = DatumGetObjectId(heap_getattr(dlist_tup->tup, Anum_pg_depend_objid, + RelationGetDescr(dependRel), &isnull)); + + /* + * Delete found tuple. Can't pass dlist_tup->tup->t_self as an + * argument - this memory is about to be freed. + */ + itemPointerData = dlist_tup->tup->t_self; + fasttab_delete(dependRel, &itemPointerData); + break; + } + } + + /* If not found - cleanup is done, end of loop */ + if (!found) + break; + + /* Find pg_type tuple with oid == curroid */ + found = false; + dlist_foreach(iter, &fasttab_snapshot->relationData[typeIdx].tuples) + { + DListHeapTuple dlist_tup = (DListHeapTuple) iter.cur; + Oid oid = DatumGetObjectId(heap_getattr(dlist_tup->tup, ObjectIdAttributeNumber, + RelationGetDescr(typeRel), &isnull)); + + if (oid == curroid) + { + found = true; + + /* + * Delete found tuple. Can't pass dlist_tup->tup->t_self as an + * argument - this memory is about to be freed. + */ + itemPointerData = dlist_tup->tup->t_self; + fasttab_delete(typeRel, &itemPointerData); + break; + } + } + + Assert(found); + } + + relation_close(typeRel, AccessShareLock); + relation_close(dependRel, AccessShareLock); +} + +/* + * Delete tuple. Basically heap_delete implementation for virtual tuples. + * Returns true if tuple was deleted, false otherwise. + */ +bool +fasttab_delete(Relation relation, ItemPointer tid) +{ + FasttabSnapshot fasttab_snapshot; + dlist_iter iter; + int idx; + + if (!IsFasttabItemPointer(tid)) + return false; + + Assert(IsFasttabHandledRelationId(RelationGetRelid(relation))); + + fasttab_snapshot = FasttabSnapshotGetCurrent(); + idx = GetSnapshotRelationIdxByOid(RelationGetRelid(relation)); + dlist_foreach(iter, &fasttab_snapshot->relationData[idx].tuples) + { + DListHeapTuple dlist_tup = (DListHeapTuple) iter.cur; + + if (ItemPointerEquals(&dlist_tup->tup->t_self, tid)) + { + /* + * If it's a tuple from pg_class, delete tuples that might still + * reference to it. + */ + if (RelationGetRelid(relation) == RelationRelationId) + { + bool isnull; + Oid reloid = DatumGetObjectId(heap_getattr(dlist_tup->tup, ObjectIdAttributeNumber, + RelationGetDescr(relation), &isnull)); + + fasttab_clean_catalog_on_relation_delete(reloid); + } + + pgstat_count_heap_delete(relation); + CacheInvalidateHeapTuple(relation, dlist_tup->tup, NULL); + + dlist_delete(&dlist_tup->node); + DListHeapTupleFree(dlist_tup); + fasttab_snapshot->relationData[idx].tuples_num--; + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_delete, tid = %08X/%04X - entry found and deleted, tuples_num = %d, idx = %d, rd_id = %d", + BlockIdGetBlockNumber(&tid->ip_blkid), tid->ip_posid, + fasttab_snapshot->relationData[idx].tuples_num, idx, relation->rd_id + ); +#endif + + return true; + } + } + + elog(ERROR, "in-memory tuple not found during delete"); + return false; /* will be never reached */ +} + +/* + * Update tuple. Basically heap_update implementation for virtual tuples. + * Returns true if tuple was updated, false otherwise. + */ +bool +fasttab_update(Relation relation, ItemPointer otid, HeapTuple newtup) +{ + FasttabSnapshot fasttab_snapshot; + dlist_iter iter; + int idx; + + if (!IsFasttabItemPointer(otid)) + return false; + + Assert(IsFasttabHandledRelationId(RelationGetRelid(relation))); + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_update, looking for otid = %08X/%04X", + BlockIdGetBlockNumber(&otid->ip_blkid), otid->ip_posid); +#endif + + fasttab_snapshot = FasttabSnapshotGetCurrent(); + idx = GetSnapshotRelationIdxByOid(RelationGetRelid(relation)); + dlist_foreach(iter, &fasttab_snapshot->relationData[idx].tuples) + { + DListHeapTuple dlist_tup = (DListHeapTuple) iter.cur; + + if (ItemPointerEquals(&dlist_tup->tup->t_self, otid)) + { + MemoryContext oldctx = MemoryContextSwitchTo(GetLocalMemoryContext()); + + CacheInvalidateHeapTuple(relation, dlist_tup->tup, newtup); + heap_freetuple(dlist_tup->tup); + newtup->t_self = GenFasttabItemPointerData(); + dlist_tup->tup = heap_copytuple(newtup); + MemoryContextSwitchTo(oldctx); + + pgstat_count_heap_update(relation, false); + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_update - entry found and updated, newtup->t_self = %08X/%04X, oid = %d, tuples_num = %d, idx = %d", + BlockIdGetBlockNumber(&newtup->t_self.ip_blkid), newtup->t_self.ip_posid, + HeapTupleGetOid(dlist_tup->tup), + fasttab_snapshot->relationData[idx].tuples_num, idx); +#endif + return true; + } + } + + elog(ERROR, "in-memory tuple not found during update"); + return false; /* will be never reached */ +} + +/* + * Update tuple "in place". Basically heap_inplace_update implementation for + * virtual tuples. Returns true if tuple was updated, false otherwise. + */ +bool +fasttab_inplace_update(Relation relation, HeapTuple tuple) +{ + FasttabSnapshot fasttab_snapshot; + dlist_iter iter; + int idx; + + if (!IsFasttabItemPointer(&tuple->t_self)) + return false; + + Assert(IsFasttabHandledRelationId(RelationGetRelid(relation))); + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_heap_inplace_update, looking for tuple with tid = %08X/%04X, oid = %d...", + BlockIdGetBlockNumber(&tuple->t_self.ip_blkid), tuple->t_self.ip_posid, + HeapTupleGetOid(tuple)); +#endif + + fasttab_snapshot = FasttabSnapshotGetCurrent(); + idx = GetSnapshotRelationIdxByOid(RelationGetRelid(relation)); + dlist_foreach(iter, &fasttab_snapshot->relationData[idx].tuples) + { + DListHeapTuple dlist_tup = (DListHeapTuple) iter.cur; + + if (ItemPointerEquals(&dlist_tup->tup->t_self, &tuple->t_self)) + { + MemoryContext oldctx = MemoryContextSwitchTo(GetLocalMemoryContext()); + + if (!IsBootstrapProcessingMode()) + CacheInvalidateHeapTuple(relation, tuple, NULL); + + heap_freetuple(dlist_tup->tup); + dlist_tup->tup = heap_copytuple(tuple); + MemoryContextSwitchTo(oldctx); + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_inplace_update - entry found and updated, tuples_num = %d, idx = %d", + fasttab_snapshot->relationData[idx].tuples_num, idx); +#endif + return true; + } + } + + elog(ERROR, "in-memory tuple not found during \"in place\" update"); + return false; /* will be never reached */ +} + +/* + * Insert an index tuple into a relation. Basically index_insert implementation + * for virtual tuples. Returns true if tuple was inserted, false otherwise. + * + * Current FFTs implementation builds indexes "on the fly" when index scan + * begins. Thus for now we do almost nothing here. + */ +bool +fasttab_index_insert(Relation indexRelation, ItemPointer heap_t_ctid, + bool *result) +{ + Oid indexId = RelationGetRelid(indexRelation); + + if (!IsFasttabItemPointer(heap_t_ctid)) + return false; + + Assert(IsFasttabHandledIndexId(indexId)); + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_index_insert, indexRelation relid = %u, heap_t_ctid = %08X/%04X", + RelationGetRelid(indexRelation), + BlockIdGetBlockNumber(&heap_t_ctid->ip_blkid), + heap_t_ctid->ip_posid); +#endif + + if (IsFasttabHandledIndexId(indexId)) + { + *result = true; + return true; /* don't actually modify an index */ + } + + return false; +} + +/* + * Compare two tuples during index scan. + * + * Returns: + * > 0 - first tuple is greater + * = 0 - tuples are equal + * < 0 - first tuple is lesser + */ +static int +fasttab_index_compare_tuples(HeapTuple first, HeapTuple second, + IndexScanDesc scan) +{ + TupleDesc tupledesc = RelationGetDescr(scan->heapRelation); + Datum datum1, + datum2; + bool isnull1, + isnull2; + int i, + result = 0; + + for (i = 0; i < scan->indexMethods->nattr; i++) + { + Assert(scan->indexMethods->attrCompareMethod[i] != CompareInvalid); + datum1 = heap_getattr(first, scan->indexMethods->attrNumbers[i], tupledesc, + &isnull1); + datum2 = heap_getattr(second, scan->indexMethods->attrNumbers[i], tupledesc, + &isnull2); + Assert((!isnull1) && (!isnull2)); + + switch (scan->indexMethods->attrCompareMethod[i]) + { + case CompareOid: + result = FasttabCompareInts(DatumGetObjectId(datum1), + DatumGetObjectId(datum2)); + break; + case CompareCString: + result = strcmp(DatumGetCString(datum1), + DatumGetCString(datum2)); + break; + case CompareInt16: + result = FasttabCompareInts(DatumGetInt16(datum1), + DatumGetInt16(datum2)); + break; + case CompareInt64: + result = FasttabCompareInts(DatumGetInt64(datum1), + DatumGetInt64(datum2)); + break; + case CompareBoolean: + result = FasttabCompareInts(DatumGetBool(datum1), + DatumGetBool(datum2)); + break; + default: /* should never happen, can be useful during + * development though */ + elog(ERROR, "Unexpected compare method: %d", + scan->indexMethods->attrCompareMethod[i]); + } + + if (result != 0) + break; + } + + return result; +} + +/* + * Form index tuple from virtual heap tuple during index-only scan. + */ +static IndexTuple +fasttab_index_form_tuple(HeapTuple tup, IndexScanDesc scan) +{ + TupleDesc heaptupledesc = RelationGetDescr(scan->heapRelation); + TupleDesc indextupledesc = RelationGetDescr(scan->indexRelation); + Datum values[FasttabIndexMaxAttributes]; + bool isnull[FasttabIndexMaxAttributes]; + int i; + + for (i = 0; i < scan->indexMethods->nattr; i++) + { + /* + * NB: heap_getattr prcesses negative attribute numbers like + * ObjectIdAttributeNumber just fine + */ + values[i] = heap_getattr(tup, scan->indexMethods->attrNumbers[i], + heaptupledesc, &(isnull[i])); + } + + return index_form_tuple(indextupledesc, values, isnull); +} + +/* + * Convert index attribute number to heap attribute number. + */ +static inline AttrNumber +fasttab_convert_index_attno_to_heap_attno(IndexScanDesc scan, + AttrNumber indexAttno) +{ + Assert(indexAttno > 0); + Assert(indexAttno <= FasttabIndexMaxAttributes); + Assert(indexAttno <= scan->indexMethods->nattr); + return scan->indexMethods->attrNumbers[indexAttno - 1]; +} + +/* + * Determine whether virtual heap tuple matches WHERE condition during index + * scan. + */ +static bool +fasttab_index_tuple_matches_where_condition(IndexScanDesc scan, HeapTuple tup) +{ + int i; + bool insert; + AttrNumber attrNumbersBackup[FasttabIndexMaxAttributes]; + + /* If WHERE condition is empty all tuples match */ + if (scan->numberOfKeys == 0) + return true; + + /* NB: scan->keyData[0].sk_strategy can be InvalidStrategy */ + Assert(scan->keyData != NULL); + Assert(scan->keyData[0].sk_attno != InvalidAttrNumber); + + /* Convert index attribute numbers to tuple attribute numbers. */ + for (i = 0; i < scan->numberOfKeys; i++) + { + attrNumbersBackup[i] = scan->keyData[i].sk_attno; + scan->keyData[i].sk_attno = fasttab_convert_index_attno_to_heap_attno(scan, scan->keyData[i].sk_attno); + } + + /* NB: HeapKeyTest is a macro, it changes `insert` variable */ + HeapKeyTest(tup, RelationGetDescr(scan->heapRelation), scan->numberOfKeys, + scan->keyData, insert); + + /* Restore original attribute numbers. */ + for (i = 0; i < scan->numberOfKeys; i++) + scan->keyData[i].sk_attno = attrNumbersBackup[i]; + + return insert; +} + +/* + * Add tuple to scan->xs_inmem_tuplist at proper position. + * + * Returs: + * true - tuple added + * false - tuple not added (filtered by WHERE condition) + */ +static bool +fasttab_index_insert_tuple_in_sorted_list(IndexScanDesc scan, HeapTuple tup) +{ + DListHeapTuple dlist_tup; + dlist_node *insert_after = &scan->xs_inmem_tuplist.head; + dlist_iter iter; + + /* scan->orderByData is never used in index scans over catalog tables */ + Assert(scan->numberOfOrderBys == 0); + Assert(scan->numberOfKeys >= 0 && scan->numberOfKeys <= FasttabIndexMaxAttributes); + + if (!fasttab_index_tuple_matches_where_condition(scan, tup)) + return false; + + /* Using regular transaction memory context here. */ + dlist_tup = palloc(sizeof(DListHeapTupleData)); + dlist_tup->tup = heap_copytuple(tup); + + dlist_foreach(iter, &scan->xs_inmem_tuplist) + { + DListHeapTuple dlist_curr = (DListHeapTuple) iter.cur; + + if (fasttab_index_compare_tuples(dlist_curr->tup, tup, scan) >= 0) + break; + + insert_after = iter.cur; + } + + dlist_insert_after(insert_after, &dlist_tup->node); + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_index_insert_tuple_in_sorted_list scan = %p, tup oid = %d, tuple added to list", + scan, HeapTupleGetOid(tup)); +#endif + + return true; +} + +/* + * Initialize part of `scan` related to virtual catalog. Basically + * index_beginscan implementation for virtual tuples. + * + * NB: scan->keyData is not initialized here (usually filled with 0x7f's) + */ +void +fasttab_index_beginscan(IndexScanDesc scan) +{ + Oid indexId = RelationGetRelid(scan->indexRelation); + + Assert(PointerIsValid(scan->indexRelation)); + + if (!IsFasttabHandledIndexId(indexId)) + return; + + scan->xs_regular_tuple_enqueued = false; + scan->xs_regular_scan_finished = false; + scan->xs_scan_finish_returned = false; + + /* indexMethods is accessed quite often so we memoize it */ + scan->indexMethods = GetFasttabIndexMethods(indexId); + + /* + * xs_inmem_tuplist is initialized when fasttab_index_getnext_tid_merge is + * called first time. We are not doing it here because: + * + * 1) It's more efficient this way, since sometimes beginscan/rescan are + * called without any actual scanning + * + * 2) Sometimes `scan` passed to beginscan is not fully initilized so we + * can't filter tuples by WHERE condition here + * + * 3) We would like to filter tuples by WHERE condition ASAP, otherwise + * memory will be wasted on tuples that will be filtered anyway + */ + scan->xs_inmem_tuplist_init_done = false; + dlist_init(&scan->xs_inmem_tuplist); + + /* + * Make sure scan->xs_ctup.t_self has proper initial value (required in + * index_getnext_tid) + */ + ItemPointerSetInvalid(&scan->xs_ctup.t_self); + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_index_beginscan (could be called from rescan), scan = %p, indexId = %u " + "scan->numberOfKeys = %d, scan->keyData = %p, scan->numberOfOrderBys = %d, scan->orderByData = %p", + scan, indexId, scan->numberOfKeys, scan->keyData, scan->numberOfOrderBys, + scan->orderByData + ); +#endif + +} + +/* + * Free part of `scan` related to virtual catalog. Basically index_endscan + * implementation for virtual tuples. + */ +void +fasttab_index_endscan(IndexScanDesc scan) +{ + Assert(PointerIsValid(scan->indexRelation)); + + if (!IsFasttabHandledIndexId(RelationGetRelid(scan->indexRelation))) + return; + + /* Free in-memory tuples left. */ + FasttabDListFree(&scan->xs_inmem_tuplist); + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_index_endscan (could be called from rescan), scan = %p, scan->indexRelation relid = %u", + scan, RelationGetRelid(scan->indexRelation) + ); +#endif + +} + +/* + * Reinitialize part of `scan` related to virtual catalog. Basically + * index_rescan implementation for virtual tuples. + * + * NB: scan->keyData is not initialized here (usually filled with 0x7f's) + */ +void +fasttab_index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, + ScanKey orderbys, int norderbys) +{ + fasttab_index_endscan(scan); + fasttab_index_beginscan(scan); +} + +/* + * Fetch virtual or regular tuple from heap. Almost as heap_fetch, but also + * understands HOT chains. + * + * Returns true if tuple was found, false otherwise. + */ +bool +fasttab_simple_heap_fetch(Relation relation, Snapshot snapshot, + HeapTuple tuple) +{ + Page page; + bool found; + Buffer buffer = InvalidBuffer; + ItemPointer tid = &(tuple->t_self); + + /* + * No need to lock any buffers for in-memory tuple, they could not even + * exist! + */ + if (IsFasttabItemPointer(tid)) + return heap_hot_search_buffer(tid, relation, buffer, snapshot, tuple, NULL, true); + + /* Fetch and pin the appropriate page of the relation. */ + buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); + + /* Need share lock on buffer to examine tuple commit status. */ + LockBuffer(buffer, BUFFER_LOCK_SHARE); + page = BufferGetPage(buffer); + TestForOldSnapshot(snapshot, relation, page); + + found = heap_hot_search_buffer(tid, relation, buffer, snapshot, tuple, + NULL, true); + + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); + + return found; +} + +/* + * Make sure scan->xs_inmem_tuplist is initialized. + */ +static void +fasttab_index_make_sure_inmem_tuplist_init_done(IndexScanDesc scan) +{ + FasttabSnapshot fasttab_snapshot; + dlist_iter iter; + int idx; + + Assert(PointerIsValid(scan->indexRelation)); + + /* initialize scan->xs_inmem_tuplist during first call */ + if (scan->xs_inmem_tuplist_init_done) + return; + + idx = GetSnapshotRelationIdxByOid(RelationGetRelid(scan->heapRelation)); + + fasttab_snapshot = FasttabSnapshotGetCurrent(); + dlist_foreach(iter, &fasttab_snapshot->relationData[idx].tuples) + { + DListHeapTuple dlist_curr = (DListHeapTuple) iter.cur; + + (void) fasttab_index_insert_tuple_in_sorted_list(scan, dlist_curr->tup); + } + + scan->xs_inmem_tuplist_init_done = true; +} + +/* + * Get next virtual or regular TID from a scan. Basically a wrapper around + * indexRelation->rd_amroutine->amgettuple procedure. + * + * NB: we filter tuples using scan->keyData _here_ since keyData is not always + * initialized when fasttab_index_beginscan or _rescan are called (usually + * filled with 0x7f's) + */ +bool +fasttab_index_getnext_tid_merge(IndexScanDesc scan, ScanDirection direction) +{ + bool fetched; + DListHeapTuple ret_node; + FmgrInfo *procedure; + + SCAN_CHECKS; + GET_SCAN_PROCEDURE(amgettuple); + + Assert(PointerIsValid(scan->indexRelation)); + + if (!IsFasttabHandledIndexId(RelationGetRelid(scan->indexRelation))) + { + /* + * Regular logic. + * + * The AM's amgettuple proc finds the next index entry matching the scan + * keys, and puts the TID into scan->xs_ctup.t_self. It should also set + * scan->xs_recheck and possibly scan->xs_itup, though we pay no attention + * to those fields here. + */ + return DatumGetBool(FunctionCall2(procedure, + PointerGetDatum(scan), + Int32GetDatum(direction))); + } + + /* Initialize scan->xs_inmem_tuplist during first call. */ + fasttab_index_make_sure_inmem_tuplist_init_done(scan); + + if (dlist_is_empty(&scan->xs_inmem_tuplist)) /* in-memory tuples + * enumiration is over? */ + { +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_index_getnext_tid_merge, scan = %p, fake tuples list is empty, xs_regular_scan_finished = %u, xs_scan_finish_returned = %u", + scan, scan->xs_regular_scan_finished, scan->xs_scan_finish_returned); +#endif + + /* + * If ->amgettuple() already returned false we should not call it once + * again. In this case btree index will start a scan all over again, + * see btgettuple implementation. Still if user will call this + * procedure once again dispite of returned 'false' value she probably + * knows what she is doing. + */ + if (scan->xs_regular_scan_finished && (!scan->xs_scan_finish_returned)) + { + scan->xs_scan_finish_returned = true; + return false; + } + + /* regular logic */ + return DatumGetBool(FunctionCall2(procedure, + PointerGetDatum(scan), + Int32GetDatum(direction))); + } + + /* + * Other directions are not used in index-only scans for catalog tables. + * No need to check direction above this point since only here + * scan->xs_inmem_tuplist is both initialized and non-empty. + */ + Assert(ScanDirectionIsForward(direction)); + + /* If there is no regular tuple in in-memory queue, we should load one. */ + while ((!scan->xs_regular_tuple_enqueued) && (!scan->xs_regular_scan_finished)) + { + + found = DatumGetBool(FunctionCall2(procedure, + PointerGetDatum(scan), + Int32GetDatum(direction))); + if (found) + { + HeapTupleData regular_tup; + + regular_tup.t_self = scan->xs_ctup.t_self; + fetched = fasttab_simple_heap_fetch(scan->heapRelation, scan->xs_snapshot, + ®ular_tup); + + if (!fetched) + { +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_index_getnext_tid_merge, scan = %p, indexed tuple not found, 'continue;'", + scan); +#endif + continue; + } + scan->xs_regular_tuple_enqueued = fasttab_index_insert_tuple_in_sorted_list(scan, ®ular_tup); + } + else + scan->xs_regular_scan_finished = true; + } + + Assert(scan->xs_regular_scan_finished || scan->xs_regular_tuple_enqueued); + + ret_node = (DListHeapTuple) dlist_pop_head_node(&scan->xs_inmem_tuplist); + Assert(PointerIsValid(ret_node)); + + scan->xs_recheck = false; + ItemPointerCopy(&ret_node->tup->t_self, &scan->xs_ctup.t_self); + + if (!IsFasttabItemPointer(&scan->xs_ctup.t_self)) + scan->xs_regular_tuple_enqueued = false; + +#ifdef FASTTAB_DEBUG + elog(NOTICE, "FASTTAB: fasttab_index_getnext_tid_merge, scan = %p, direction = %d, scan->indexRelation relid = %u, return tuple tid = %08X/%04X", + scan, direction, RelationGetRelid(scan->indexRelation), + BlockIdGetBlockNumber(&scan->xs_ctup.t_self.ip_blkid), + scan->xs_ctup.t_self.ip_posid + ); +#endif + + scan->xs_itup = fasttab_index_form_tuple(ret_node->tup, scan); + DListHeapTupleFree(ret_node); + return true; +} + +/* + * Get all tuples, virtual and regular, at once from an index scan. Basically + * index_getbitmap implementation for virtual tuples. + * + * Returns true on success and false if relation doesn't have a virtual part. + */ +bool +fasttab_index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap, int64 *result) +{ + int64 ntids = 0; + bool heap_opened = false; + + Assert(PointerIsValid(scan->indexRelation)); + + if (!IsFasttabHandledIndexId(RelationGetRelid(scan->indexRelation))) + return false; + + /* Fill heapRelation if it's NULL, we require it in fasttab_* procedures */ + if (!scan->heapRelation) + { + scan->heapRelation = heap_open(scan->indexRelation->rd_index->indrelid, + NoLock); + heap_opened = true; + } + + /* Initialize scan->xs_inmem_tuplist during first call. */ + fasttab_index_make_sure_inmem_tuplist_init_done(scan); + + /* There are in fact no in-memory tuples? */ + if (dlist_is_empty(&scan->xs_inmem_tuplist)) + { + if (heap_opened) /* cleanup */ + { + heap_close(scan->heapRelation, NoLock); + scan->heapRelation = NULL; + } + return false; + } + + while (fasttab_index_getnext_tid_merge(scan, ForwardScanDirection)) + { + tbm_add_tuples(bitmap, &scan->xs_ctup.t_self, 1, false); + ntids++; + } + + if (heap_opened) /* cleanup */ + { + heap_close(scan->heapRelation, NoLock); + scan->heapRelation = NULL; + } + + *result = ntids; + return true; +} + + +/***************************************************************************** + PROCEDURES USED IN FasttabRelationMethodsTable + *****************************************************************************/ + +/* + * Determine wheter given tuple of pg_class relation should be stored in-memory. + * + * If tuple's relpersistence = RELPERSISTENCE_FAST_TEMP it should be virtual. + */ +static bool +pg_class_is_inmem_tuple(Relation relation, HeapTuple tup, + FasttabSnapshot fasttab_snapshot, int tableIdx) +{ + bool isnull; + Datum relpersistencedat; + TupleDesc tupledesc; + + Assert(RelationGetRelid(relation) == RelationRelationId); + + tupledesc = RelationGetDescr(relation); + relpersistencedat = heap_getattr(tup, Anum_pg_class_relpersistence, + tupledesc, &isnull); + Assert(!isnull); + return ((char) relpersistencedat == RELPERSISTENCE_FAST_TEMP); +} + +/* + * Determine wheter given tuple of relations other than pg_class should be + * stored in-memory. + * + * If tuple references to virtual pg_class tuple it should be virtual as well. + */ +static bool +generic_is_inmem_tuple(Relation relation, HeapTuple tup, + FasttabSnapshot fasttab_snapshot, int tableIdx) +{ + dlist_iter iter; + TupleDesc tupledesc; + Oid values[FasttabRelationMaxOidAttributes]; + bool isnull; + int i, + pg_class_idx, + noidattr = FasttabRelationMethodsTable[tableIdx].noidattr; + + Assert(IsFasttabHandledRelationId(RelationGetRelid(relation))); + Assert(tableIdx >= 0 && tableIdx < FasttabSnapshotTablesNumber); + Assert(noidattr > 0 && noidattr <= FasttabRelationMaxOidAttributes); + + /* + * Special case. During table creation pg_type and pg_depend are modified + * before pg_class (see heap_create_with_catalog implementation) so there + * is no way to tell wheter tuples are in-memory without using + * relperistence hint. Also this check could be considered as an + * optimization. + */ + if ((RelationGetRelid(relation) == TypeRelationId) || (RelationGetRelid(relation) == DependRelationId)) + return (CurrentRelpersistenceHint == RELPERSISTENCE_FAST_TEMP); + + tupledesc = RelationGetDescr(relation); + + for (i = 0; i < noidattr; i++) + { + values[i] = DatumGetObjectId(heap_getattr(tup, + FasttabRelationMethodsTable[tableIdx].attrNumbers[i], + tupledesc, &isnull)); + Assert(!isnull); + } + + /* + * Check whether there is an in-memory pg_class tuple with oid from + * values[] array + */ + pg_class_idx = GetSnapshotRelationIdxByOid(RelationRelationId); + dlist_foreach(iter, &fasttab_snapshot->relationData[pg_class_idx].tuples) + { + DListHeapTuple dlist_tup = (DListHeapTuple) iter.cur; + Oid oid = HeapTupleGetOid(dlist_tup->tup); + + for (i = 0; i < noidattr; i++) + { + if (oid == values[i]) + return true; + } + } + + return false; +} + diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 19fac199e79..9de813bdf79 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -807,7 +807,8 @@ gistGetFakeLSN(Relation rel) { static XLogRecPtr counter = 1; - if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP || + rel->rd_rel->relpersistence == RELPERSISTENCE_FAST_TEMP) { /* * Temporary relations are only accessible in our session, so a simple diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 9dc53738231..f61d429622e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -51,6 +51,7 @@ #include "access/visibilitymap.h" #include "access/xact.h" #include "access/xlogutils.h" +#include "access/fasttab.h" #include "catalog/catalog.h" #include "catalog/namespace.h" #include "miscadmin.h" @@ -70,6 +71,8 @@ #include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tqual.h" +#include "utils/memutils.h" +#include "lib/ilist.h" #include "catalog/oid_dispatch.h" #include "cdb/cdbvars.h" @@ -1674,6 +1677,9 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, initscan(scan, key, false); + /* Initialize part of `scan` related to virtual catalog. */ + fasttab_beginscan(scan); + return scan; } @@ -1695,6 +1701,9 @@ heap_afterscan(HeapScanDesc scan) ReleaseBuffer(scan->rs_cbuf); scan->rs_cbuf = InvalidBuffer; } + + /* Reinitialize part of `scan` related to virtual catalog. */ + fasttab_beginscan(scan); } /* ---------------- @@ -1711,6 +1720,9 @@ heap_rescan(HeapScanDesc scan, * reinitialize scan descriptor */ initscan(scan, key, true); + + /* Reinitialize part of `scan` related to virtual catalog. */ + fasttab_beginscan(scan); } /* ---------------- @@ -1775,6 +1787,12 @@ heap_endscan(HeapScanDesc scan) HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction) { + HeapTuple fasttab_result; + + /* First return all virtual tuples, then regular ones. */ + fasttab_result = fasttab_getnext(scan, direction); + if (HeapTupleIsValid(fasttab_result)) + return fasttab_result; /* Note: no locking manipulations needed */ HEAPDEBUG_1; /* heap_getnext( info ) */ @@ -1855,6 +1873,8 @@ heap_fetch(Relation relation, OffsetNumber offnum; bool valid; + Assert(!IsFasttabHandledRelationId(relation->rd_id)); + /* * Fetch and pin the appropriate page of the relation. */ @@ -1978,13 +1998,24 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Snapshot snapshot, HeapTuple heapTuple, bool *all_dead, bool first_call) { - Page dp = (Page) BufferGetPage(buffer); + Page dp; TransactionId prev_xmax = InvalidTransactionId; BlockNumber blkno; OffsetNumber offnum; bool at_chain_start; bool valid; bool skip; + bool fasttab_result; + + /* Return matching virtual tuple if there is one. */ + if (fasttab_hot_search_buffer(tid, relation, heapTuple, all_dead, &fasttab_result)) + return fasttab_result; + + /* + * `buffer` can be InvalidBuffer for in-memory tuples, so we should call + * BufferGetPage only after we verified it's not a case. + */ + dp = (Page) BufferGetPage(buffer); /* If this is not the first call, previous call returned a (live!) tuple */ if (all_dead) @@ -2149,6 +2180,8 @@ heap_get_latest_tid(Relation relation, ItemPointerData ctid; TransactionId priorXmax; + Assert(!IsFasttabHandledRelationId(relation->rd_id)); + /* this is to avoid Assert failures on bad input */ if (!ItemPointerIsValid(tid)) return; @@ -2381,6 +2414,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, FaultInjector_InjectFaultIfSet("heap_insert", DDLNotSpecified, "", RelationGetRelationName(relation)); #endif + Oid fasttab_result; /* * Fill in tuple header fields, assign an OID, and toast the tuple if @@ -2391,6 +2425,10 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, */ heaptup = heap_prepare_insert(relation, tup, xid, cid, options, isFrozen); + /* If it's a virtual tuple it should be inserted in virtual catalog. */ + if (fasttab_insert(relation, tup, heaptup, &fasttab_result)) + return fasttab_result; + /* * Find buffer to insert this tuple into. If the page is all visible, * this will also pin the requisite visibility map page. @@ -2690,6 +2728,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, /* currently not needed (thus unsupported) for heap_multi_insert() */ AssertArg(!(options & HEAP_INSERT_NO_LOGICAL)); + Assert(!IsFasttabHandledRelationId(relation->rd_id)); needwal = !(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation); saveFreeSpace = RelationGetTargetPageFreeSpace(relation, @@ -3111,6 +3150,10 @@ heap_delete(Relation relation, ItemPointer tid, Assert(RelationIsHeap(relation)); gp_expand_protect_catalog_changes(relation); + /* If it's a virtual tuple, it should be deleted from virtual catalog. */ + if (fasttab_delete(relation, tid)) + return HeapTupleMayBeUpdated; + block = ItemPointerGetBlockNumber(tid); buffer = ReadBuffer(relation, block); @@ -3580,6 +3623,10 @@ heap_update_internal(Relation relation, ItemPointer otid, HeapTuple newtup, gp_expand_protect_catalog_changes(relation); + /* If it's a virtual tuple it should be updated in virtual catalog. */ + if (fasttab_update(relation, otid, newtup)) + return HeapTupleMayBeUpdated; + /* * Fetch the list of attributes to be checked for HOT update. This is * wasted effort if we fail to update or have to put the new tuple on a @@ -4690,6 +4737,8 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, new_infomask2; bool have_tuple_lock = false; + Assert(!IsFasttabHandledRelationId(relation->rd_id)); + *buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid)); LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); @@ -5261,6 +5310,8 @@ static void heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, bool nowait, bool *have_tuple_lock) { + Assert(!IsFasttabHandledRelationId(relation->rd_id)); + if (*have_tuple_lock) return; @@ -5961,6 +6012,8 @@ static HTSU_Result heap_lock_updated_tuple(Relation rel, HeapTuple tuple, ItemPointer ctid, TransactionId xid, LockTupleMode mode) { + Assert(!IsFasttabHandledRelationId(rel->rd_id)); + /* * If the tuple has not been updated, or has moved into another partition * (effectively a delete) stop here. @@ -6013,6 +6066,10 @@ heap_inplace_update(Relation relation, HeapTuple tuple) uint32 oldlen; uint32 newlen; + /* If it's a virtual tuple it should be updated in virtual catalog. */ + if (fasttab_inplace_update(relation, tuple)) + return; + buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&(tuple->t_self))); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); page = (Page) BufferGetPage(buffer); @@ -6435,6 +6492,8 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, bool freeze_xmax = false; TransactionId xid; + Assert(!IsFasttabItemPointer(&tuple->t_ctid)); + frz->frzflags = 0; frz->t_infomask2 = tuple->t_infomask2; frz->t_infomask = tuple->t_infomask; @@ -6624,6 +6683,8 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz) { + Assert(!IsFasttabItemPointer(&tuple->t_ctid)); + HeapTupleHeaderSetXmax(tuple, frz->xmax); if (frz->frzflags & XLH_FREEZE_XVAC) @@ -7031,6 +7092,8 @@ heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, { TransactionId xid; + Assert(!IsFasttabItemPointer(&tuple->t_ctid)); + xid = HeapTupleHeaderGetXmin(tuple); if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, cutoff_xid)) diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 8b2d86b986c..86364afd33d 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -71,6 +71,8 @@ #include "access/xlog.h" #include "access/bitmap.h" +#include "access/fasttab.h" +#include "catalog/catalog.h" #include "catalog/index.h" #include "catalog/catalog.h" #include "pgstat.h" @@ -213,10 +215,15 @@ index_insert(Relation indexRelation, IndexUniqueCheck checkUnique) { FmgrInfo *procedure; + bool result; RELATION_CHECKS; GET_REL_PROCEDURE(aminsert); + /* If it's a virtual ItemPointer, process it accordingly. */ + if (fasttab_index_insert(indexRelation, heap_t_ctid, &result)) + return result; + if (!(indexRelation->rd_am->ampredlocks)) CheckForSerializableConflictIn(indexRelation, (HeapTuple) NULL, @@ -313,6 +320,8 @@ index_beginscan_internal(Relation indexRelation, Int32GetDatum(nkeys), Int32GetDatum(norderbys))); + /* Initialize part of `scan` related to virtual catalog */ + fasttab_index_beginscan(scan); return scan; } @@ -358,6 +367,9 @@ index_rescan(IndexScanDesc scan, Int32GetDatum(nkeys), PointerGetDatum(orderbys), Int32GetDatum(norderbys)); + + /* Reinitialize part of `scan` related to virtual catalog. */ + fasttab_index_rescan(scan, keys, nkeys, orderbys, norderbys); } /* ---------------- @@ -372,6 +384,9 @@ index_endscan(IndexScanDesc scan) SCAN_CHECKS; GET_SCAN_PROCEDURE(amendscan); + /* Free part of `scan` related to virtual catalog. */ + fasttab_index_endscan(scan); + /* Release any held pin on a heap page */ if (BufferIsValid(scan->xs_cbuf)) { @@ -426,6 +441,7 @@ index_restrpos(IndexScanDesc scan) FmgrInfo *procedure; Assert(IsMVCCSnapshot(scan->xs_snapshot)); + Assert(!IsFasttabHandledIndexId(scan->indexRelation->rd_id)); SCAN_CHECKS; GET_SCAN_PROCEDURE(amrestrpos); @@ -447,23 +463,12 @@ index_restrpos(IndexScanDesc scan) ItemPointer index_getnext_tid(IndexScanDesc scan, ScanDirection direction) { - FmgrInfo *procedure; bool found; - SCAN_CHECKS; - GET_SCAN_PROCEDURE(amgettuple); - Assert(TransactionIdIsValid(RecentGlobalXmin)); - /* - * The AM's amgettuple proc finds the next index entry matching the scan - * keys, and puts the TID into scan->xs_ctup.t_self. It should also set - * scan->xs_recheck and possibly scan->xs_itup, though we pay no attention - * to those fields here. - */ - found = DatumGetBool(FunctionCall2(procedure, - PointerGetDatum(scan), - Int32GetDatum(direction))); + /* Get the next regular or virtual TID */ + found = fasttab_index_getnext_tid_merge(scan, direction); /* Reset kill flag immediately for safety */ scan->kill_prior_tuple = false; @@ -511,6 +516,17 @@ index_fetch_heap(IndexScanDesc scan) bool all_dead = false; bool got_heap_tuple; + /* Is it a virtual TID? */ + if (IsFasttabItemPointer(tid)) + { + bool fasttab_result; + + /* Just get virtual tuple by TID */ + got_heap_tuple = fasttab_hot_search_buffer(tid, scan->heapRelation, &scan->xs_ctup, &all_dead, &fasttab_result); + Assert(got_heap_tuple && fasttab_result); + return &scan->xs_ctup; + } + /* We can skip the buffer-switching logic if we're in mid-HOT chain. */ if (!scan->xs_continue_hot) { @@ -672,19 +688,24 @@ index_getbitmap(IndexScanDesc scan, Node **bitmapP) /* just make sure this is false... */ scan->kill_prior_tuple = false; - /* - * have the am's getbitmap proc do all the work. - */ - d = FunctionCall2(procedure, - PointerGetDatum(scan), - PointerGetDatum(bitmapP)); + /* Try to use virtual catalog procedure first */ + if (!fasttab_index_getbitmap(scan, bitmap, &ntids)) + { + /* if it failed - have the am's getbitmap proc do all the work. */ + /* + * have the am's getbitmap proc do all the work. + */ + d = FunctionCall2(procedure, + PointerGetDatum(scan), + PointerGetDatum(bitmapP)); - ntids = DatumGetInt64(d); + ntids = DatumGetInt64(d); - /* If int8 is pass-by-ref, must free the result to avoid memory leak */ + /* If int8 is pass-by-ref, must free the result to avoid memory leak */ #ifndef USE_FLOAT8_BYVAL - pfree(DatumGetPointer(d)); + pfree(DatumGetPointer(d)); #endif + } pgstat_count_index_tuples(scan->indexRelation, ntids); diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 4f8196696cf..e1ac9b7b4bc 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -20,6 +20,8 @@ #include "access/transam.h" #include "cdb/cdbappendonlyam.h" #include "cdb/cdbaocsam.h" +#include "access/xloginsert.h" +#include "access/fasttab.h" #include "miscadmin.h" #include "storage/lmgr.h" #include "storage/predicate.h" @@ -390,6 +392,18 @@ _bt_check_unique(Relation rel, IndexTuple itup, Relation heapRel, * If we are doing a recheck, we expect to find the tuple we * are rechecking. It's not a duplicate, but we have to keep * scanning. + * If its in-memory tuple there is for sure no transaction + * to wait for. + */ + if (IsFasttabItemPointer(&htid)) + return InvalidTransactionId; + + /* + * It is a duplicate. If we are only doing a partial + * check, then don't bother checking if the tuple is being + * updated in another transaction. Just return the fact + * that it is a potential conflict and leave the full + * check till later. */ if (checkUnique == UNIQUE_CHECK_EXISTING && ItemPointerCompare(&htid, &itup->t_tid) == 0) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 618ce3f536a..d963e3f76d0 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -28,6 +28,7 @@ #include "access/xact.h" #include "access/xact_storage_tablespace.h" #include "access/xlogutils.h" +#include "access/fasttab.h" #include "catalog/catalog.h" #include "catalog/namespace.h" #include "catalog/oid_dispatch.h" @@ -2591,6 +2592,9 @@ StartTransaction(void) initialize_wal_bytes_written(); ShowTransactionState("StartTransaction"); + /* Make preparations related to virtual catalog */ + fasttab_begin_transaction(); + elogif(Debug_print_full_dtm, LOG, "StartTransaction in DTX Context = '%s', " "isolation level %s, read-only = %d, %s", @@ -2896,6 +2900,9 @@ CommitTransaction(void) s->state = TRANS_DEFAULT; /* we're now in a consistent state to handle an interrupt. */ + /* Perform actions related to virtual catalog. */ + fasttab_end_transaction(); + RESUME_INTERRUPTS(); /* Release resource group slot at the end of a transaction */ @@ -3385,6 +3392,9 @@ AbortTransaction(void) pgstat_report_xact_timestamp(0); } + /* Perform actions related to virtual catalog. */ + fasttab_abort_transaction(); + /* * Exported snapshots must be cleared before transaction ID is reset. In * GPDB, transaction ID is reset below. In PostgreSQL, because 2PC is not @@ -4779,6 +4789,9 @@ DefineSavepoint(char *name) BlockStateAsString(s->blockState)); break; } + + /* Perform actions related to virtual catalog. */ + fasttab_define_savepoint(name); } /* @@ -5008,6 +5021,9 @@ RollbackToSavepoint(List *options) else elog(FATAL, "RollbackToSavepoint: unexpected state %s", BlockStateAsString(xact->blockState)); + + /* Perform actions related to virtual catalog. */ + fasttab_rollback_to_savepoint(name); } static void diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index bc9c692421e..89c4156c304 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -661,6 +661,7 @@ GetNewRelFileNode(Oid reltablespace, Relation pg_class, char relpersistence) switch (relpersistence) { case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_FAST_TEMP: backend = TempRelBackendId; break; case RELPERSISTENCE_UNLOGGED: diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 11e193485c9..b9fd6da544c 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -16,6 +16,7 @@ #include "access/htup_details.h" #include "access/xact.h" +#include "access/fasttab.h" #include "catalog/dependency.h" #include "catalog/heap.h" #include "catalog/index.h" @@ -591,6 +592,13 @@ findDependentObjects(const ObjectAddress *object, { Form_pg_depend foundDep = (Form_pg_depend) GETSTRUCT(tup); + /* + * Ignore in-memory tuples here. They are properly handled by virtual + * catalog logic already. + */ + if (IsFasttabItemPointer(&tup->t_self)) + continue; + otherObject.classId = foundDep->refclassid; otherObject.objectId = foundDep->refobjid; otherObject.objectSubId = foundDep->refobjsubid; @@ -774,6 +782,13 @@ findDependentObjects(const ObjectAddress *object, Form_pg_depend foundDep = (Form_pg_depend) GETSTRUCT(tup); int subflags; + /* + * Ignore in-memory tuples here. They are properly handled by virtual + * catalog logic already. + */ + if (IsFasttabItemPointer(&tup->t_self)) + continue; + otherObject.classId = foundDep->classid; otherObject.objectId = foundDep->objid; otherObject.objectSubId = foundDep->objsubid; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 537eaa56b7f..113a23bec59 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -39,6 +39,8 @@ #include "access/reloptions.h" #include "access/xact.h" #include "catalog/aocatalog.h" +#include "access/xlog.h" +#include "access/fasttab.h" #include "catalog/binary_upgrade.h" #include "catalog/catalog.h" #include "catalog/dependency.h" @@ -1355,7 +1357,7 @@ AddNewRelationType(const char *typeName, * tupdesc: tuple descriptor (source of column definitions) * cooked_constraints: list of precooked check constraints and defaults * relkind: relkind for new rel - * relpersistence: rel's persistence status (permanent, temp, or unlogged) + * relpersistence: rel's persistence status (permanent, temp, fast temp or unlogged) * shared_relation: TRUE if it's to be a shared relation * mapped_relation: TRUE if the relation will use the relfilenode map * oidislocal: TRUE if oid column (if any) should be marked attislocal @@ -1614,69 +1616,97 @@ heap_create_with_catalog(const char *relname, heap_close(pg_type, AccessShareLock); } - /* - * Since defining a relation also defines a complex type, we add a new - * system type corresponding to the new relation. The OID of the type can - * be preselected by the caller, but if reltypeid is InvalidOid, we'll - * generate a new OID for it. - * - * NOTE: we could get a unique-index failure here, in case someone else is - * creating the same type name in parallel but hadn't committed yet when - * we checked for a duplicate name above. - */ - new_type_addr = AddNewRelationType(relname, - relnamespace, - relid, - relkind, - ownerid, - reltypeid, - new_array_oid); - new_type_oid = new_type_addr.objectId; - if (typaddress) - *typaddress = new_type_addr; - - /* - * Now make the array type if wanted. - */ - if (OidIsValid(new_array_oid)) + PG_TRY(); { if (!relarrayname) relarrayname = makeArrayTypeName(relname, relnamespace); + /* Set a relpersistence hint. See procedure description. */ + fasttab_set_relpersistence_hint(relpersistence); - TypeCreate(new_array_oid, /* force the type's OID to this */ - relarrayname, /* Array type name */ - relnamespace, /* Same namespace as parent */ - InvalidOid, /* Not composite, no relationOid */ - 0, /* relkind, also N/A here */ - ownerid, /* owner's ID */ - -1, /* Internal size (varlena) */ - TYPTYPE_BASE, /* Not composite - typelem is */ - TYPCATEGORY_ARRAY, /* type-category (array) */ - false, /* array types are never preferred */ - DEFAULT_TYPDELIM, /* default array delimiter */ - F_ARRAY_IN, /* array input proc */ - F_ARRAY_OUT, /* array output proc */ - F_ARRAY_RECV, /* array recv (bin) proc */ - F_ARRAY_SEND, /* array send (bin) proc */ - InvalidOid, /* typmodin procedure - none */ - InvalidOid, /* typmodout procedure - none */ - F_ARRAY_TYPANALYZE, /* array analyze procedure */ - new_type_oid, /* array element type - the rowtype */ - true, /* yes, this is an array type */ - InvalidOid, /* this has no array type */ - InvalidOid, /* domain base type - irrelevant */ - NULL, /* default value - none */ - NULL, /* default binary representation */ - false, /* passed by reference */ - 'd', /* alignment - must be the largest! */ - 'x', /* fully TOASTable */ - -1, /* typmod */ - 0, /* array dimensions for typBaseType */ - false, /* Type NOT NULL */ - InvalidOid); /* rowtypes never have a collation */ + /* + * Since defining a relation also defines a complex type, we add a new + * system type corresponding to the new relation. The OID of the type + * can be preselected by the caller, but if reltypeid is InvalidOid, + * we'll generate a new OID for it. + * + * NOTE: we could get a unique-index failure here, in case someone + * else is creating the same type name in parallel but hadn't + * committed yet when we checked for a duplicate name above. + */ + new_type_addr = AddNewRelationType(relname, + relnamespace, + relid, + relkind, + ownerid, + reltypeid, + new_array_oid); + + /* Clear relpersistence hint. */ + fasttab_clear_relpersistence_hint(); + + new_type_oid = new_type_addr.objectId; + if (typaddress) + *typaddress = new_type_addr; + + /* + * Now make the array type if wanted. + */ + if (OidIsValid(new_array_oid)) + { + char *relarrayname; - pfree(relarrayname); + relarrayname = makeArrayTypeName(relname, relnamespace); + + /* Set a relpersistence hint. See procedure description. */ + fasttab_set_relpersistence_hint(relpersistence); + + TypeCreate(new_array_oid, /* force the type's OID to this */ + relarrayname, /* Array type name */ + relnamespace, /* Same namespace as parent */ + InvalidOid, /* Not composite, no relationOid */ + 0, /* relkind, also N/A here */ + ownerid, /* owner's ID */ + -1, /* Internal size (varlena) */ + TYPTYPE_BASE, /* Not composite - typelem is */ + TYPCATEGORY_ARRAY, /* type-category (array) */ + false, /* array types are never preferred */ + DEFAULT_TYPDELIM, /* default array delimiter */ + F_ARRAY_IN, /* array input proc */ + F_ARRAY_OUT, /* array output proc */ + F_ARRAY_RECV, /* array recv (bin) proc */ + F_ARRAY_SEND, /* array send (bin) proc */ + InvalidOid, /* typmodin procedure - none */ + InvalidOid, /* typmodout procedure - none */ + F_ARRAY_TYPANALYZE, /* array analyze procedure */ + new_type_oid, /* array element type - the rowtype */ + true, /* yes, this is an array type */ + InvalidOid, /* this has no array type */ + InvalidOid, /* domain base type - irrelevant */ + NULL, /* default value - none */ + NULL, /* default binary representation */ + false, /* passed by reference */ + 'd', /* alignment - must be the largest! */ + 'x', /* fully TOASTable */ + -1, /* typmod */ + 0, /* array dimensions for typBaseType */ + false, /* Type NOT NULL */ + InvalidOid); /* rowtypes never have a collation */ + + /* Clear relpersistence hint. */ + fasttab_clear_relpersistence_hint(); + + pfree(relarrayname); + } + + } + PG_CATCH(); + { + /* clear relpersistence hint in case of error */ + fasttab_clear_relpersistence_hint(); + PG_RE_THROW(); } + PG_END_TRY(); + /* * now create an entry in pg_class for the relation. @@ -1762,7 +1792,7 @@ heap_create_with_catalog(const char *relname, recordDependencyOnNewAcl(RelationRelationId, relid, 0, ownerid, relacl); - if (relpersistence != RELPERSISTENCE_TEMP) + if (relpersistence != RELPERSISTENCE_TEMP && relpersistence != RELPERSISTENCE_FAST_TEMP) recordDependencyOnCurrentExtension(&myself, false); if (reloftypeid) diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 0d15e2410c1..dc0e82c90bb 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -31,6 +31,7 @@ #include "access/transam.h" #include "access/visibilitymap.h" #include "access/xact.h" +#include "access/fasttab.h" #include "bootstrap/bootstrap.h" #include "catalog/aoblkdir.h" #include "catalog/binary_upgrade.h" @@ -2685,6 +2686,14 @@ IndexBuildHeapScan(Relation heapRelation, { bool tupleIsAlive; + /* + * Ignore in-memory tuples here. These tuples are in fact not indexed. + * They are mixed in during index scans right from the virtual heap + * instead. + */ + if (IsFasttabItemPointer(&heapTuple->t_self)) + continue; + CHECK_FOR_INTERRUPTS(); /* diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index 142a79a88af..e45d0e23b28 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -295,7 +295,7 @@ RangeVarGetRelidExtended(const RangeVar *relation, LOCKMODE lockmode, * operation, which must be careful to find the temp table, even when * pg_temp is not first in the search path. */ - if (relation->relpersistence == RELPERSISTENCE_TEMP) + if (relation->relpersistence == RELPERSISTENCE_TEMP || relation->relpersistence == RELPERSISTENCE_FAST_TEMP) { if (!OidIsValid(myTempNamespace)) relId = InvalidOid; /* this probably can't happen? */ @@ -474,7 +474,7 @@ RangeVarGetCreationNamespace(const RangeVar *newRelation) namespaceId = get_namespace_oid(newRelation->schemaname, false); /* we do not check for USAGE rights here! */ } - else if (newRelation->relpersistence == RELPERSISTENCE_TEMP) + else if (newRelation->relpersistence == RELPERSISTENCE_TEMP || newRelation->relpersistence == RELPERSISTENCE_FAST_TEMP) { /* Initialize temp namespace if first time through */ if (!OidIsValid(myTempNamespace)) @@ -642,6 +642,7 @@ RangeVarAdjustRelationPersistence(RangeVar *newRelation, Oid nspid) switch (newRelation->relpersistence) { case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_FAST_TEMP: if (!isTempOrToastNamespace(nspid)) { if (isAnyTempNamespace(nspid)) diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 7d792884208..632c768080e 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -87,6 +87,7 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence, char relstorage) switch (relpersistence) { case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_FAST_TEMP: backend = TempRelBackendId; needs_wal = false; break; diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 4e69c0333f9..2c6583fbc8e 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -28,6 +28,9 @@ #include "catalog/aoseg.h" #include "catalog/aoblkdir.h" #include "catalog/aovisimap.h" +#include "access/xlog.h" +#include "access/fasttab.h" +#include "catalog/pg_am.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/heap.h" @@ -751,9 +754,12 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, bool forcetemp, if (forcetemp) { - namespaceid = LookupCreationNamespace("pg_temp"); relpersistence = RELPERSISTENCE_TEMP; } + + if (relpersistence == RELPERSISTENCE_TEMP || relpersistence == RELPERSISTENCE_FAST_TEMP) + namespaceid = LookupCreationNamespace("pg_temp"); + } else { namespaceid = RelationGetNamespace(OldHeap); @@ -1099,6 +1105,10 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, if (tuple == NULL) break; + /* No need to move in-memory tuple anywhere */ + if (IsFasttabItemPointer(&tuple->t_self)) + continue; + /* Since we used no scan keys, should never need to recheck */ if (indexScan->xs_recheck) elog(ERROR, "CLUSTER does not support lossy index conditions"); @@ -1111,6 +1121,10 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, if (tuple == NULL) break; + /* No need to move in-memory tuple anywhere */ + if (IsFasttabItemPointer(&tuple->t_self)) + continue; + buf = heapScan->rs_cbuf; } diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 0fbf063afdd..30465350eb7 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -2687,7 +2687,7 @@ ReindexDatabase(ReindexStmt *stmt) continue; /* Skip temp tables of other backends; we can't reindex them at all */ - if (classtuple->relpersistence == RELPERSISTENCE_TEMP && + if ((classtuple->relpersistence == RELPERSISTENCE_TEMP || classtuple->relpersistence == RELPERSISTENCE_FAST_TEMP) && !isTempNamespace(classtuple->relnamespace)) continue; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 89291b379af..5b507402336 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -31,6 +31,8 @@ #include "access/tupconvert.h" #include "access/xact.h" #include "catalog/aocatalog.h" +#include "access/xlog.h" +#include "access/fasttab.h" #include "catalog/catalog.h" #include "catalog/dependency.h" #include "catalog/heap.h" @@ -614,7 +616,8 @@ DefineRelation(CreateStmt *stmt, * Check consistency of arguments */ if (stmt->oncommit != ONCOMMIT_NOOP - && stmt->relation->relpersistence != RELPERSISTENCE_TEMP) + && stmt->relation->relpersistence != RELPERSISTENCE_TEMP + && stmt->relation->relpersistence != RELPERSISTENCE_FAST_TEMP) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("ON COMMIT can only be used on temporary tables"))); @@ -637,7 +640,8 @@ DefineRelation(CreateStmt *stmt, * code. This is needed because calling code might not expect untrusted * tables to appear in pg_temp at the front of its search path. */ - if (stmt->relation->relpersistence == RELPERSISTENCE_TEMP + if ((stmt->relation->relpersistence == RELPERSISTENCE_TEMP || + stmt->relation->relpersistence == RELPERSISTENCE_FAST_TEMP) && InSecurityRestrictedOperation()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), @@ -2251,7 +2255,9 @@ MergeAttributes(List *schema, List *supers, char relpersistence, bool isPartitio parent->relname))); /* Permanent rels cannot inherit from temporary ones */ if (relpersistence != RELPERSISTENCE_TEMP && - relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + relpersistence != RELPERSISTENCE_FAST_TEMP && + (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP || + relation->rd_rel->relpersistence == RELPERSISTENCE_FAST_TEMP)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot inherit from temporary relation \"%s\"", @@ -2265,7 +2271,8 @@ MergeAttributes(List *schema, List *supers, char relpersistence, bool isPartitio parent->relname))); /* If existing rel is temp, it must belong to this session */ - if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP && + if ((relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP || + relation->rd_rel->relpersistence == RELPERSISTENCE_FAST_TEMP) && !relation->rd_islocaltemp) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -9525,7 +9532,9 @@ ATAddForeignKeyConstraint(AlteredTableInfo *tab, Relation rel, errmsg("constraints on unlogged tables may reference only permanent or unlogged tables"))); break; case RELPERSISTENCE_TEMP: - if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + case RELPERSISTENCE_FAST_TEMP: + if (pkrel->rd_rel->relpersistence != RELPERSISTENCE_TEMP && + pkrel->rd_rel->relpersistence != RELPERSISTENCE_FAST_TEMP) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("constraints on temporary tables may reference only temporary tables"))); @@ -13949,12 +13958,30 @@ ATExecAddInherit(Relation child_rel, Node *node, LOCKMODE lockmode) ATSimplePermissions(parent_rel, ATT_TABLE); /* Permanent rels cannot inherit from temporary ones */ - if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - child_rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + if ((parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP || + parent_rel->rd_rel->relpersistence == RELPERSISTENCE_FAST_TEMP) && + (child_rel->rd_rel->relpersistence != RELPERSISTENCE_TEMP && + child_rel->rd_rel->relpersistence != RELPERSISTENCE_FAST_TEMP)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot inherit from temporary relation \"%s\"", RelationGetRelationName(parent_rel)))); + + /* If parent rel is temp, it must belong to this session */ + if ((parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP || + parent_rel->rd_rel->relpersistence == RELPERSISTENCE_FAST_TEMP) && + !parent_rel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit from temporary relation of another session"))); + + /* Ditto for the child */ + if ((child_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP || + child_rel->rd_rel->relpersistence == RELPERSISTENCE_FAST_TEMP) && + !child_rel->rd_islocaltemp) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot inherit to temporary relation of another session"))); if (is_partition) { diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c index fcc561da823..592a0611861 100644 --- a/src/backend/commands/tablespace.c +++ b/src/backend/commands/tablespace.c @@ -1510,7 +1510,7 @@ GetDefaultTablespace(char relpersistence) Oid result; /* The temp-table case is handled elsewhere */ - if (relpersistence == RELPERSISTENCE_TEMP) + if (relpersistence == RELPERSISTENCE_TEMP || relpersistence == RELPERSISTENCE_FAST_TEMP) { PrepareTempTablespaces(); return GetNextTempTableSpace(); diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index c8da318be76..aa8c62c77cf 100755 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -43,6 +43,7 @@ #include "access/relscan.h" #include "access/transam.h" +#include "access/fasttab.h" #include "executor/execdebug.h" #include "executor/nodeBitmapHeapscan.h" #include "pgstat.h" @@ -379,6 +380,34 @@ BitmapHeapNext(BitmapHeapScanState *node) node->lossy_pages++; CheckSendPlanStateGpmonPkt(&node->ss.ps); + if(tbmres->blockno < scan->rs_nblocks) + { + /* + * Normal case. Fetch the current heap page and identify + * candidate tuples. + */ + bitgetpage(scan, tbmres); + } + else + { + /* + * Probably we are looking for in-memory tuple. This code + * executes in cases when CurrentFasttabBlockId is larger than + * normal block id's. + */ + OffsetNumber i; + + /* + * Check all tuples on a virtual page. + * + * NB: 0 is an invalid offset. + */ + for(i = 1; i <= MaxHeapTuplesPerPage; i++) + scan->rs_vistuples[i-1] = FASTTAB_ITEM_POINTER_BIT | i; + + scan->rs_ntuples = MaxHeapTuplesPerPage; + tbmres->recheck = true; + } /* * Set rs_cindex to first slot to examine @@ -474,6 +503,25 @@ BitmapHeapNext(BitmapHeapScanState *node) #endif ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); + /* Is it a virtual TID? */ + if (IsFasttabItemPointer(&scan->rs_ctup.t_self)) + { + /* Fetch tuple from virtual catalog (if tuple still exists). */ + if(!fasttab_simple_heap_fetch(scan->rs_rd, scan->rs_snapshot, &scan->rs_ctup)) + continue; + } + else + { + /* Regular logic. */ + dp = (Page) BufferGetPage(scan->rs_cbuf); + lp = PageGetItemId(dp, targoffset); + Assert(ItemIdIsNormal(lp)); + + scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); + scan->rs_ctup.t_len = ItemIdGetLength(lp); + scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; + } + pgstat_count_heap_fetch(scan->rs_rd); /* diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c index d363bd4be2f..2fec679fe05 100644 --- a/src/backend/nodes/tidbitmap.c +++ b/src/backend/nodes/tidbitmap.c @@ -35,10 +35,38 @@ #include "access/htup_details.h" #include "access/bitmap.h" /* XXX: remove once pull_stream is generic */ #include "executor/instrument.h" /* Instrumentation */ +#include "access/fasttab.h" #include "nodes/bitmapset.h" #include "nodes/tidbitmap.h" #include "utils/hsearch.h" +/* + * The maximum number of tuples per page is not large (typically 256 with + * 8K pages, or 1024 with 32K pages). Also in-memory tuples have large fake + * offsets because of FASTTAB_ITEM_POINTER_BIT. So there's not much point in + * making the per-page bitmaps variable size. We just legislate that the size + * is this: + */ +#define MAX_TUPLES_PER_PAGE (FASTTAB_ITEM_POINTER_BIT | MaxHeapTuplesPerPage) + +/* + * When we have to switch over to lossy storage, we use a data structure + * with one bit per page, where all pages having the same number DIV + * PAGES_PER_CHUNK are aggregated into one chunk. When a chunk is present + * and has the bit set for a given page, there must not be a per-page entry + * for that page in the page table. + * + * We actually store both exact pages and lossy chunks in the same hash + * table, using identical data structures. (This is because dynahash.c's + * memory management doesn't allow space to be transferred easily from one + * hashtable to another.) Therefore it's best if PAGES_PER_CHUNK is the + * same as MAX_TUPLES_PER_PAGE, or at least not too different. But we + * also want PAGES_PER_CHUNK to be a power of 2 to avoid expensive integer + * remainder operations. So, define it like this: + */ +#define PAGES_PER_CHUNK (BLCKSZ / 32) + +/* We use BITS_PER_BITMAPWORD and typedef bitmapword from nodes/bitmapset.h */ #define WORDNUM(x) ((x) / TBM_BITS_PER_BITMAPWORD) #define BITNUM(x) ((x) % TBM_BITS_PER_BITMAPWORD) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index c4e381391ef..3f8be96987f 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -630,6 +630,7 @@ static bool is_table_distributed(Oid relid) { HeapTuple gp_policy_tuple; + char relpersistence; gp_policy_tuple = SearchSysCache1(GPPOLICYID, ObjectIdGetDatum(relid)); if (HeapTupleIsValid(gp_policy_tuple)) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 1fcd69d670b..aafa796725d 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -637,7 +637,7 @@ static Node *makeIsNotDistinctFromNode(Node *expr, int position); EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXISTS EXPLAIN EXTENSION EXTERNAL EXTRACT - FALSE_P FAMILY FETCH FILTER FIRST_P FLOAT_P FOLLOWING FOR + FALSE_P FAMILY FAST FETCH FILTER FIRST_P FLOAT_P FOLLOWING FOR FORCE FOREIGN FORWARD FREEZE FROM FULL FUNCTION FUNCTIONS GLOBAL GRANT GRANTED GREATEST GROUP_P @@ -4249,6 +4249,8 @@ OptTemp: TEMPORARY { $$ = RELPERSISTENCE_TEMP; } | TEMP { $$ = RELPERSISTENCE_TEMP; } | LOCAL TEMPORARY { $$ = RELPERSISTENCE_TEMP; } | LOCAL TEMP { $$ = RELPERSISTENCE_TEMP; } + | FAST TEMPORARY { $$ = RELPERSISTENCE_FAST_TEMP; } + | FAST TEMP { $$ = RELPERSISTENCE_FAST_TEMP; } | GLOBAL TEMPORARY { ereport(WARNING, @@ -12297,6 +12299,16 @@ OptTempTableName: $$ = $4; $$->relpersistence = RELPERSISTENCE_TEMP; } + | FAST TEMPORARY opt_table qualified_name + { + $$ = $4; + $$->relpersistence = RELPERSISTENCE_FAST_TEMP; + } + | FAST TEMP opt_table qualified_name + { + $$ = $4; + $$->relpersistence = RELPERSISTENCE_FAST_TEMP; + } | GLOBAL TEMPORARY opt_table qualified_name { ereport(WARNING, @@ -15818,6 +15830,7 @@ unreserved_keyword: | EXTENSION | EXTERNAL | FAMILY + | FAST | FIELDS | FILL | FILTER diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index f114504d835..2ebb07b1224 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -3391,7 +3391,7 @@ isQueryUsingTempRelation_walker(Node *node, void *context) char relpersistence = rel->rd_rel->relpersistence; heap_close(rel, AccessShareLock); - if (relpersistence == RELPERSISTENCE_TEMP) + if (relpersistence == RELPERSISTENCE_TEMP || relpersistence == RELPERSISTENCE_FAST_TEMP) return true; } } diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index b3493b1f9d2..8994b9eda10 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -223,7 +223,8 @@ transformCreateStmt(CreateStmt *stmt, const char *queryString, bool createPartit * specified to be in pg_temp, so no need for anything extra in that case. */ if (stmt->relation->schemaname == NULL - && stmt->relation->relpersistence != RELPERSISTENCE_TEMP) + && stmt->relation->relpersistence != RELPERSISTENCE_TEMP + && stmt->relation->relpersistence != RELPERSISTENCE_FAST_TEMP) stmt->relation->schemaname = get_namespace_name(namespaceid); /* Set up pstate and CreateStmtContext */ diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 12231d66c39..8cec95148eb 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -2143,7 +2143,8 @@ do_autovacuum(void) * Check if it is a temp table (presumably, of some other backend's). * We cannot safely process other backends' temp tables. */ - if (classForm->relpersistence == RELPERSISTENCE_TEMP) + if (classForm->relpersistence == RELPERSISTENCE_TEMP || + classForm->relpersistence == RELPERSISTENCE_FAST_TEMP) { int backendID; PGPROC *proc; @@ -2256,7 +2257,8 @@ do_autovacuum(void) /* * We cannot safely process other backends' temp tables, so skip 'em. */ - if (classForm->relpersistence == RELPERSISTENCE_TEMP) + if (classForm->relpersistence == RELPERSISTENCE_TEMP || + classForm->relpersistence == RELPERSISTENCE_FAST_TEMP) continue; relid = HeapTupleGetOid(tuple); diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 4a7b329e032..61e3be20bdc 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -1056,6 +1056,7 @@ pg_relation_filepath(PG_FUNCTION_ARGS) backend = InvalidBackendId; break; case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_FAST_TEMP: if (isTempOrToastNamespace(relform->relnamespace)) backend = MyBackendId; else diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 062b6cc2a02..26b63efc2d8 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -927,6 +927,7 @@ RelationBuildDesc(Oid targetRelId, bool insertIt) relation->rd_islocaltemp = false; break; case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_FAST_TEMP: if (isTempOrToastNamespace(relation->rd_rel->relnamespace)) { relation->rd_backend = TempRelBackendId; @@ -1931,6 +1932,7 @@ RelationReloadIndexInfo(Relation relation) RelationGetRelid(relation)); relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); memcpy(relation->rd_rel, relp, CLASS_TUPLE_SIZE); + /* Reload reloptions in case they changed */ if (relation->rd_options) pfree(relation->rd_options); @@ -3066,6 +3068,7 @@ RelationBuildLocalRelation(const char *relname, rel->rd_islocaltemp = false; break; case RELPERSISTENCE_TEMP: + case RELPERSISTENCE_FAST_TEMP: Assert(isTempOrToastNamespace(relnamespace)); rel->rd_backend = TempRelBackendId; rel->rd_islocaltemp = true; diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 269bae1bbfb..d856cae617f 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -761,6 +761,7 @@ static const pgsql_thing_t words_after_create[] = { {"DOMAIN", NULL, &Query_for_list_of_domains}, {"EVENT TRIGGER", NULL, NULL}, {"EXTENSION", Query_for_list_of_extensions}, + {"FAST TEMP", NULL, NULL, THING_NO_DROP}, /* for CREATE FAST TEMP TABLE ... */ {"FOREIGN DATA WRAPPER", NULL, NULL}, {"FOREIGN TABLE", NULL, NULL}, {"FUNCTION", NULL, &Query_for_list_of_functions}, diff --git a/src/include/access/fasttab.h b/src/include/access/fasttab.h new file mode 100644 index 00000000000..4be68ca961a --- /dev/null +++ b/src/include/access/fasttab.h @@ -0,0 +1,102 @@ +/*------------------------------------------------------------------------- + * + * fasttab.h + * virtual catalog and fast temporary tables + * + * FOR INTERNAL USAGE ONLY. Backward compatability is not guaranteed. + * Don't use in extensions! + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/fasttab.h + * + *------------------------------------------------------------------------- + */ + +#ifndef FASTTAB_H +#define FASTTAB_H + +#include "c.h" +#include "postgres_ext.h" +#include "access/htup.h" +#include "access/heapam.h" +#include "access/sdir.h" +#include "access/genam.h" +#include "catalog/indexing.h" +#include "storage/itemptr.h" +#include "utils/relcache.h" + +/* + * Flag stored in ItemPointerData.ip_posid to mark tuple as virtual. We can + * safely store a flag in higher bits of ip_posid since it's maximum value is + * very limited. See MaxHeapTuplesPerPage. + * + * This constant better be not too large since MAX_TUPLES_PER_PAGE depends on + * its value. + */ +#define FASTTAB_ITEM_POINTER_BIT 0x0800 + +/* Determine whether ItemPointer is virtual */ +#define IsFasttabItemPointer(ptr) \ + ( ((ptr)->ip_posid & FASTTAB_ITEM_POINTER_BIT) != 0 ) + +typedef struct FasttabIndexMethodsData FasttabIndexMethodsData; + +typedef FasttabIndexMethodsData const *FasttabIndexMethods; + +extern bool IsFasttabHandledRelationId(Oid relId); + +extern bool IsFasttabHandledIndexId(Oid indexId); + +extern void fasttab_set_relpersistence_hint(char relpersistence); + +extern void fasttab_clear_relpersistence_hint(void); + +extern void fasttab_begin_transaction(void); + +extern void fasttab_end_transaction(void); + +extern void fasttab_abort_transaction(void); + +extern void fasttab_define_savepoint(const char *name); + +extern void fasttab_rollback_to_savepoint(const char *name); + +extern void fasttab_beginscan(HeapScanDesc scan); + +extern HeapTuple fasttab_getnext(HeapScanDesc scan, ScanDirection direction); + +extern bool fasttab_hot_search_buffer(ItemPointer tid, Relation relation, + HeapTuple heapTuple, bool *all_dead, bool *result); + +extern bool fasttab_insert(Relation relation, HeapTuple tup, HeapTuple heaptup, + Oid *result); + +extern bool fasttab_delete(Relation relation, ItemPointer tid); + +extern bool fasttab_update(Relation relation, ItemPointer otid, + HeapTuple newtup); + +extern bool fasttab_inplace_update(Relation relation, HeapTuple tuple); + +extern bool fasttab_index_insert(Relation indexRelation, + ItemPointer heap_t_ctid, bool *result); + +extern void fasttab_index_beginscan(IndexScanDesc scan); + +extern void fasttab_index_rescan(IndexScanDesc scan, ScanKey keys, int nkeys, + ScanKey orderbys, int norderbys); + +extern bool fasttab_simple_heap_fetch(Relation relation, Snapshot snapshot, + HeapTuple tuple); + +extern bool fasttab_index_getnext_tid_merge(IndexScanDesc scan, + ScanDirection direction); + +extern bool fasttab_index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap, + int64 *result); + +extern void fasttab_index_endscan(IndexScanDesc scan); + +#endif /* FASTTAB_H */ diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 0732bf7f678..abd43e66095 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -19,6 +19,7 @@ #include "access/htup_details.h" #include "access/itup.h" #include "access/tupdesc.h" +#include "access/fasttab.h" #include "access/formatter.h" @@ -52,6 +53,8 @@ typedef struct HeapScanDescData int rs_cindex; /* current tuple's index in vistuples */ int rs_ntuples; /* number of visible tuples on page */ OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ + + dlist_node *rs_curr_inmem_tupnode; /* current virtual tuple, or NULL */ } HeapScanDescData; /* @@ -92,6 +95,19 @@ typedef struct IndexScanDescData /* state data for traversing HOT chains in index_getnext */ bool xs_continue_hot; /* T if must keep walking HOT chain */ + + /* sorted list of virtual tuples that should be returned during a scan */ + dlist_head xs_inmem_tuplist; + /* memoized corresponding FasttabIndexMethodsTable entry */ + FasttabIndexMethods indexMethods; + /* whether xs_inmem_tuplist was initialized */ + bool xs_inmem_tuplist_init_done; + /* whether xs_inmem_tuplist contains a regular tuple */ + bool xs_regular_tuple_enqueued; + /* whether internal regular scan was finished */ + bool xs_regular_scan_finished; + /* whether information that scan was finished was returned */ + bool xs_scan_finish_returned; } IndexScanDescData; /* diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 56f7592056a..ad6c4adfa55 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -174,9 +174,11 @@ DESCR(""); #define RELKIND_AOVISIMAP 'M' /* AO visibility map */ #define RELKIND_YEZZEYINDEX 'Y' /* yezzey external storage map */ +#define RELPERSISTENCE_UNDEFINED '?' /* invalid relpersistence value */ #define RELPERSISTENCE_PERMANENT 'p' /* regular table */ #define RELPERSISTENCE_UNLOGGED 'u' /* unlogged permanent table */ #define RELPERSISTENCE_TEMP 't' /* temporary table */ +#define RELPERSISTENCE_FAST_TEMP 'f' /* fast temporary table */ /* default selection for replica identity (primary key or nothing) */ #define REPLICA_IDENTITY_DEFAULT 'd' diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index b6705abe3df..1cad556a28a 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -170,6 +170,7 @@ PG_KEYWORD("external", EXTERNAL, UNRESERVED_KEYWORD) PG_KEYWORD("extract", EXTRACT, COL_NAME_KEYWORD) PG_KEYWORD("false", FALSE_P, RESERVED_KEYWORD) PG_KEYWORD("family", FAMILY, UNRESERVED_KEYWORD) +PG_KEYWORD("fast", FAST, UNRESERVED_KEYWORD) PG_KEYWORD("fetch", FETCH, RESERVED_KEYWORD) PG_KEYWORD("fields", FIELDS, UNRESERVED_KEYWORD) PG_KEYWORD("fill", FILL, UNRESERVED_KEYWORD) diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 24d39757f4c..bacc7aa27ac 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -507,7 +507,7 @@ typedef struct ViewOptions * RelationUsesLocalBuffers * True if relation's pages are stored in local buffers. * - * In GPDB, we do not use local buffers for temp tables because segmates need + * In GPDB, we do not use local buffers for temp tables because segments need * to share temp table contents. Currently, there is no other reason to use * local buffers. */ @@ -518,7 +518,7 @@ typedef struct ViewOptions * True if relation's catalog entries live in a private namespace. */ #define RelationUsesTempNamespace(relation) \ - ((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP) + (((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP)) || ((relation)->rd_rel->relpersistence == RELPERSISTENCE_FAST_TEMP)) /* * RELATION_IS_LOCAL @@ -539,7 +539,7 @@ typedef struct ViewOptions * Beware of multiple eval of argument */ #define RELATION_IS_OTHER_TEMP(relation) \ - ((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP && \ + ((((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP) || ((relation)->rd_rel->relpersistence == RELPERSISTENCE_FAST_TEMP)) && \ !(relation)->rd_islocaltemp) diff --git a/src/test/regress/expected/fast_temp.out b/src/test/regress/expected/fast_temp.out new file mode 100644 index 00000000000..91945d3ab7b --- /dev/null +++ b/src/test/regress/expected/fast_temp.out @@ -0,0 +1,406 @@ +-- +-- FAST TEMP +-- Test fast temporary tables +-- +-- basic test +CREATE FAST TEMP TABLE fasttab_test1(x int, s text); +INSERT INTO fasttab_test1 VALUES (1, 'aaa'), (2, 'bbb'), (3, 'ccc'), (4, 'ddd'); +UPDATE fasttab_test1 SET s = 'eee' WHERE x = 4; +UPDATE fasttab_test1 SET x = 5 WHERE s = 'bbb'; +DELETE FROM fasttab_test1 WHERE x = 3; +SELECT * FROM fasttab_test1 ORDER BY x; + x | s +---+----- + 1 | aaa + 4 | eee + 5 | bbb +(3 rows) + +DROP TABLE fasttab_test1; +-- kind of load test +do $$ +declare + count_fast_table integer = 150; + count_attr integer = 20; + i integer; + j integer; + t_sql text; +begin + for i in 1 .. count_fast_table + loop + t_sql = 'CREATE FAST TEMP TABLE fast_table_' || i :: text; + t_sql = t_sql || ' ('; + for j in 1 .. count_attr + loop + t_sql = t_sql || ' attr' || j || ' text'; + if j <> count_attr then + t_sql = t_sql || ', '; + end if; + end loop; + t_sql = t_sql || ' );'; + execute t_sql; + -- raise info 't_sql %', t_sql; + end loop; +end $$; +SELECT * FROM fast_table_1; + attr1 | attr2 | attr3 | attr4 | attr5 | attr6 | attr7 | attr8 | attr9 | attr10 | attr11 | attr12 | attr13 | attr14 | attr15 | attr16 | attr17 | attr18 | attr19 | attr20 +-------+-------+-------+-------+-------+-------+-------+-------+-------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+-------- +(0 rows) + +-- test bitmap index scan +SELECT count(*) FROM pg_class WHERE relname = 'fast_table_1' OR relname = 'fast_table_2'; + count +------- + 2 +(1 row) + +-- create / delete / create test +CREATE FAST TEMP TABLE fasttab_test1(x int, s text); +-- check index only scan +SELECT COUNT(*) FROM pg_class WHERE relname = 'fasttab_test1'; + count +------- + 1 +(1 row) + +SELECT relname FROM pg_class WHERE relname = 'fasttab_test1'; + relname +--------------- + fasttab_test1 +(1 row) + +DROP TABLE fasttab_test1; +-- select from non-existend temp table +SELECT COUNT(*) FROM fasttab_test1; +ERROR: relation "fasttab_test1" does not exist +LINE 1: SELECT COUNT(*) FROM fasttab_test1; + ^ +CREATE FAST TEMP TABLE fasttab_test1(x int, s text); +CREATE FAST TEMP TABLE fasttab_test2(x int, s text); +SELECT * FROM fasttab_test1; + x | s +---+--- +(0 rows) + +-- check that ALTER works as expected +ALTER TABLE fasttab_test1 ADD COLUMN y int; +SELECT * FROM fasttab_test1; + x | s | y +---+---+--- +(0 rows) + +ALTER TABLE fasttab_test1 ADD COLUMN z int; +SELECT * FROM fasttab_test1; + x | s | y | z +---+---+---+--- +(0 rows) + +ALTER TABLE fasttab_test1 DROP COLUMN x; +SELECT * FROM fasttab_test1; + s | y | z +---+---+--- +(0 rows) + +ALTER TABLE fasttab_test1 DROP COLUMN y; +SELECT * FROM fasttab_test1; + s | z +---+--- +(0 rows) + +-- check tat ALTER TABLE ... RENAME TO ... works as expected +CREATE FAST TEMP TABLE fast_temp_1 (x int); +ALTER TABLE fast_temp_1 RENAME TO fast_temp_2; +CREATE FAST TEMP TABLE fast_temp_1 (x int); +DROP TABLE fast_temp_1; +DROP TABLE fast_temp_2; +-- test transactions and savepoints +BEGIN; +INSERT INTO fasttab_test2 VALUES (1, 'aaa'), (2, 'bbb'); +SELECT * FROM fasttab_test2; + x | s +---+----- + 1 | aaa + 2 | bbb +(2 rows) + +ROLLBACK; +SELECT * FROM fasttab_test2; + x | s +---+--- +(0 rows) + +BEGIN; +INSERT INTO fasttab_test2 VALUES (3, 'ccc'), (4, 'ddd'); +SELECT * FROM fasttab_test2; + x | s +---+----- + 3 | ccc + 4 | ddd +(2 rows) + +COMMIT; +SELECT * FROM fasttab_test2; + x | s +---+----- + 3 | ccc + 4 | ddd +(2 rows) + +BEGIN; +SAVEPOINT sp1; +ALTER TABLE fasttab_test2 ADD COLUMN y int; +SELECT * FROM fasttab_test2; + x | s | y +---+-----+--- + 3 | ccc | + 4 | ddd | +(2 rows) + +SAVEPOINT sp2; +INSERT INTO fasttab_test2 VALUES (5, 'eee', 6); +SELECT * FROM fasttab_test2; + x | s | y +---+-----+--- + 3 | ccc | + 4 | ddd | + 5 | eee | 6 +(3 rows) + +ROLLBACK TO SAVEPOINT sp2; +INSERT INTO fasttab_test2 VALUES (55, 'EEE', 66); +SELECT * FROM fasttab_test2; + x | s | y +----+-----+---- + 3 | ccc | + 4 | ddd | + 55 | EEE | 66 +(3 rows) + +ROLLBACK TO SAVEPOINT sp2; +SELECT * FROM fasttab_test2; + x | s | y +---+-----+--- + 3 | ccc | + 4 | ddd | +(2 rows) + +COMMIT; +DROP TABLE fasttab_test1; +DROP TABLE fasttab_test2; +-- test that exceptions are handled properly +DO $$ +DECLARE +BEGIN + CREATE FAST TEMP TABLE fast_exception_test(x int, y int, z int); + RAISE EXCEPTION 'test error'; +END $$; +ERROR: test error +CONTEXT: PL/pgSQL function inline_code_block line 5 at RAISE +CREATE FAST TEMP TABLE fast_exception_test(x int, y int, z int); +DROP TABLE fast_exception_test; +-- test that inheritance works as expected +-- OK: +CREATE TABLE cities (name text, population float, altitude int); +CREATE TABLE capitals (state char(2)) INHERITS (cities); +DROP TABLE capitals; +DROP TABLE cities; +-- OK: +CREATE TABLE cities2 (name text, population float, altitude int); +CREATE FAST TEMPORARY TABLE capitals2 (state char(2)) INHERITS (cities2); +INSERT INTO capitals2 VALUES ('Moscow', 123.45, 789, 'RU'); +INSERT INTO capitals2 VALUES ('Paris', 543.21, 987, 'FR'); +SELECT * FROM capitals2; + name | population | altitude | state +--------+------------+----------+------- + Moscow | 123.45 | 789 | RU + Paris | 543.21 | 987 | FR +(2 rows) + +SELECT * FROM cities2; + name | population | altitude +--------+------------+---------- + Moscow | 123.45 | 789 + Paris | 543.21 | 987 +(2 rows) + +DELETE FROM cities2 WHERE name = 'Moscow'; +SELECT * FROM capitals2; + name | population | altitude | state +-------+------------+----------+------- + Paris | 543.21 | 987 | FR +(1 row) + +SELECT * FROM cities2; + name | population | altitude +-------+------------+---------- + Paris | 543.21 | 987 +(1 row) + +DROP TABLE capitals2; +DROP TABLE cities2; +-- ERROR: +CREATE FAST TEMPORARY TABLE cities3 (name text, population float, altitude int); +-- cannot inherit from temporary relation "cities3" +CREATE TABLE capitals3 (state char(2)) INHERITS (cities3); +ERROR: cannot inherit from temporary relation "cities3" +DROP TABLE cities3; +-- OK: +CREATE FAST TEMPORARY TABLE cities4 (name text, population float, altitude int); +CREATE FAST TEMPORARY TABLE capitals4 (state char(2)) INHERITS (cities4); +INSERT INTO capitals4 VALUES ('Moscow', 123.45, 789, 'RU'); +INSERT INTO capitals4 VALUES ('Paris', 543.21, 987, 'FR'); +SELECT * FROM capitals4; + name | population | altitude | state +--------+------------+----------+------- + Moscow | 123.45 | 789 | RU + Paris | 543.21 | 987 | FR +(2 rows) + +SELECT * FROM cities4; + name | population | altitude +--------+------------+---------- + Moscow | 123.45 | 789 + Paris | 543.21 | 987 +(2 rows) + +DELETE FROM cities4 WHERE name = 'Moscow'; +SELECT * FROM capitals4; + name | population | altitude | state +-------+------------+----------+------- + Paris | 543.21 | 987 | FR +(1 row) + +SELECT * FROM cities4; + name | population | altitude +-------+------------+---------- + Paris | 543.21 | 987 +(1 row) + +DROP TABLE capitals4; +DROP TABLE cities4; +-- OK: +CREATE TEMPORARY TABLE cities5 (name text, population float, altitude int); +CREATE FAST TEMPORARY TABLE capitals5 (state char(2)) INHERITS (cities5); +INSERT INTO capitals5 VALUES ('Moscow', 123.45, 789, 'RU'); +INSERT INTO capitals5 VALUES ('Paris', 543.21, 987, 'FR'); +SELECT * FROM capitals5; + name | population | altitude | state +--------+------------+----------+------- + Moscow | 123.45 | 789 | RU + Paris | 543.21 | 987 | FR +(2 rows) + +SELECT * FROM cities5; + name | population | altitude +--------+------------+---------- + Moscow | 123.45 | 789 + Paris | 543.21 | 987 +(2 rows) + +DELETE FROM cities5 WHERE name = 'Moscow'; +SELECT * FROM capitals5; + name | population | altitude | state +-------+------------+----------+------- + Paris | 543.21 | 987 | FR +(1 row) + +SELECT * FROM cities5; + name | population | altitude +-------+------------+---------- + Paris | 543.21 | 987 +(1 row) + +DROP TABLE capitals5; +DROP TABLE cities5; +-- OK: +CREATE FAST TEMPORARY TABLE cities6 (name text, population float, altitude int); +CREATE TEMPORARY TABLE capitals6 (state char(2)) INHERITS (cities6); +INSERT INTO capitals6 VALUES ('Moscow', 123.45, 789, 'RU'); +INSERT INTO capitals6 VALUES ('Paris', 543.21, 987, 'FR'); +SELECT * FROM capitals6; + name | population | altitude | state +--------+------------+----------+------- + Moscow | 123.45 | 789 | RU + Paris | 543.21 | 987 | FR +(2 rows) + +SELECT * FROM cities6; + name | population | altitude +--------+------------+---------- + Moscow | 123.45 | 789 + Paris | 543.21 | 987 +(2 rows) + +DELETE FROM cities6 WHERE name = 'Moscow'; +SELECT * FROM capitals6; + name | population | altitude | state +-------+------------+----------+------- + Paris | 543.21 | 987 | FR +(1 row) + +SELECT * FROM cities6; + name | population | altitude +-------+------------+---------- + Paris | 543.21 | 987 +(1 row) + +DROP TABLE capitals6; +DROP TABLE cities6; +-- test index-only scan +CREATE FAST TEMP TABLE fasttab_unique_prefix_beta(x int); +CREATE TABLE fasttab_unique_prefix_alpha(x int); +CREATE FAST TEMP TABLE fasttab_unique_prefix_delta(x int); +CREATE TABLE fasttab_unique_prefix_epsilon(x int); +CREATE TABLE fasttab_unique_prefix_gamma(x int); +SELECT relname FROM pg_class WHERE relname > 'fasttab_unique_prefix_' ORDER BY relname LIMIT 5; + relname +------------------------------- + fasttab_unique_prefix_alpha + fasttab_unique_prefix_beta + fasttab_unique_prefix_delta + fasttab_unique_prefix_epsilon + fasttab_unique_prefix_gamma +(5 rows) + +DROP TABLE fasttab_unique_prefix_alpha; +DROP TABLE fasttab_unique_prefix_beta; +DROP TABLE fasttab_unique_prefix_gamma; +DROP TABLE fasttab_unique_prefix_delta; +DROP TABLE fasttab_unique_prefix_epsilon; +-- test VACUUM / VACUUM FULL +VACUUM; +VACUUM FULL; +SELECT * FROM fast_table_1; + attr1 | attr2 | attr3 | attr4 | attr5 | attr6 | attr7 | attr8 | attr9 | attr10 | attr11 | attr12 | attr13 | attr14 | attr15 | attr16 | attr17 | attr18 | attr19 | attr20 +-------+-------+-------+-------+-------+-------+-------+-------+-------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+-------- +(0 rows) + +-- test ANALYZE +CREATE FAST TEMP TABLE fasttab_analyze_test(x int, s text); +INSERT INTO fasttab_analyze_test SELECT x, '--> ' || x FROM generate_series(1,100) as x; +ANALYZE fasttab_analyze_test; +SELECT count(*) FROM pg_statistic WHERE starelid = (SELECT oid FROM pg_class WHERE relname = 'fasttab_analyze_test'); + count +------- + 2 +(1 row) + +DROP TABLE fasttab_analyze_test; +SELECT count(*) FROM pg_statistic WHERE starelid = (SELECT oid FROM pg_class WHERE relname = 'fasttab_analyze_test'); + count +------- + 0 +(1 row) + +-- cleanup after load test +do $$ +declare + count_fast_table integer = 150; + t_sql text; +begin + for i in 1 .. count_fast_table + loop + t_sql = 'DROP TABLE fast_table_' || i || ';'; + execute t_sql; + end loop; +end $$; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 37e20c6222e..943ab9b1ca1 100755 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -162,6 +162,11 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid c # large objects are not supported by GPDB # test: largeobject +# ---------- +# Another group of parallel tests +# ---------- +test: fast_temp + # event triggers cannot run concurrently with any test that runs DDL test: event_trigger diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index c54288b4c8a..db61c07ef69 100755 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -154,3 +154,4 @@ test: xml test: event_trigger test: stats test: createdb +test: fast_temp diff --git a/src/test/regress/sql/fast_temp.sql b/src/test/regress/sql/fast_temp.sql new file mode 100644 index 00000000000..e4fef5e89c5 --- /dev/null +++ b/src/test/regress/sql/fast_temp.sql @@ -0,0 +1,265 @@ +-- +-- FAST TEMP +-- Test fast temporary tables +-- + +-- basic test + +CREATE FAST TEMP TABLE fasttab_test1(x int, s text); + +INSERT INTO fasttab_test1 VALUES (1, 'aaa'), (2, 'bbb'), (3, 'ccc'), (4, 'ddd'); + +UPDATE fasttab_test1 SET s = 'eee' WHERE x = 4; + +UPDATE fasttab_test1 SET x = 5 WHERE s = 'bbb'; + +DELETE FROM fasttab_test1 WHERE x = 3; + +SELECT * FROM fasttab_test1 ORDER BY x; + +DROP TABLE fasttab_test1; + +-- kind of load test + +do $$ +declare + count_fast_table integer = 150; + count_attr integer = 20; + i integer; + j integer; + t_sql text; +begin + for i in 1 .. count_fast_table + loop + t_sql = 'CREATE FAST TEMP TABLE fast_table_' || i :: text; + t_sql = t_sql || ' ('; + for j in 1 .. count_attr + loop + t_sql = t_sql || ' attr' || j || ' text'; + if j <> count_attr then + t_sql = t_sql || ', '; + end if; + end loop; + t_sql = t_sql || ' );'; + execute t_sql; + -- raise info 't_sql %', t_sql; + end loop; +end $$; + +SELECT * FROM fast_table_1; + +-- test bitmap index scan + +SELECT count(*) FROM pg_class WHERE relname = 'fast_table_1' OR relname = 'fast_table_2'; + +-- create / delete / create test + +CREATE FAST TEMP TABLE fasttab_test1(x int, s text); + +-- check index only scan + +SELECT COUNT(*) FROM pg_class WHERE relname = 'fasttab_test1'; +SELECT relname FROM pg_class WHERE relname = 'fasttab_test1'; + +DROP TABLE fasttab_test1; + +-- select from non-existend temp table + +SELECT COUNT(*) FROM fasttab_test1; + +CREATE FAST TEMP TABLE fasttab_test1(x int, s text); +CREATE FAST TEMP TABLE fasttab_test2(x int, s text); +SELECT * FROM fasttab_test1; + +-- check that ALTER works as expected + +ALTER TABLE fasttab_test1 ADD COLUMN y int; +SELECT * FROM fasttab_test1; + +ALTER TABLE fasttab_test1 ADD COLUMN z int; +SELECT * FROM fasttab_test1; + +ALTER TABLE fasttab_test1 DROP COLUMN x; +SELECT * FROM fasttab_test1; + +ALTER TABLE fasttab_test1 DROP COLUMN y; +SELECT * FROM fasttab_test1; + +-- check tat ALTER TABLE ... RENAME TO ... works as expected + +CREATE FAST TEMP TABLE fast_temp_1 (x int); +ALTER TABLE fast_temp_1 RENAME TO fast_temp_2; +CREATE FAST TEMP TABLE fast_temp_1 (x int); +DROP TABLE fast_temp_1; +DROP TABLE fast_temp_2; + +-- test transactions and savepoints + +BEGIN; + +INSERT INTO fasttab_test2 VALUES (1, 'aaa'), (2, 'bbb'); +SELECT * FROM fasttab_test2; + +ROLLBACK; + +SELECT * FROM fasttab_test2; + +BEGIN; + +INSERT INTO fasttab_test2 VALUES (3, 'ccc'), (4, 'ddd'); +SELECT * FROM fasttab_test2; + +COMMIT; + +SELECT * FROM fasttab_test2; + + +BEGIN; + +SAVEPOINT sp1; + +ALTER TABLE fasttab_test2 ADD COLUMN y int; +SELECT * FROM fasttab_test2; + +SAVEPOINT sp2; + +INSERT INTO fasttab_test2 VALUES (5, 'eee', 6); +SELECT * FROM fasttab_test2; +ROLLBACK TO SAVEPOINT sp2; + +INSERT INTO fasttab_test2 VALUES (55, 'EEE', 66); +SELECT * FROM fasttab_test2; +ROLLBACK TO SAVEPOINT sp2; + +SELECT * FROM fasttab_test2; +COMMIT; + +DROP TABLE fasttab_test1; +DROP TABLE fasttab_test2; + +-- test that exceptions are handled properly + +DO $$ +DECLARE +BEGIN + CREATE FAST TEMP TABLE fast_exception_test(x int, y int, z int); + RAISE EXCEPTION 'test error'; +END $$; + +CREATE FAST TEMP TABLE fast_exception_test(x int, y int, z int); +DROP TABLE fast_exception_test; + +-- test that inheritance works as expected +-- OK: + +CREATE TABLE cities (name text, population float, altitude int); +CREATE TABLE capitals (state char(2)) INHERITS (cities); +DROP TABLE capitals; +DROP TABLE cities; + +-- OK: + +CREATE TABLE cities2 (name text, population float, altitude int); +CREATE FAST TEMPORARY TABLE capitals2 (state char(2)) INHERITS (cities2); +INSERT INTO capitals2 VALUES ('Moscow', 123.45, 789, 'RU'); +INSERT INTO capitals2 VALUES ('Paris', 543.21, 987, 'FR'); +SELECT * FROM capitals2; +SELECT * FROM cities2; +DELETE FROM cities2 WHERE name = 'Moscow'; +SELECT * FROM capitals2; +SELECT * FROM cities2; +DROP TABLE capitals2; +DROP TABLE cities2; + +-- ERROR: + +CREATE FAST TEMPORARY TABLE cities3 (name text, population float, altitude int); +-- cannot inherit from temporary relation "cities3" +CREATE TABLE capitals3 (state char(2)) INHERITS (cities3); +DROP TABLE cities3; + +-- OK: + +CREATE FAST TEMPORARY TABLE cities4 (name text, population float, altitude int); +CREATE FAST TEMPORARY TABLE capitals4 (state char(2)) INHERITS (cities4); +INSERT INTO capitals4 VALUES ('Moscow', 123.45, 789, 'RU'); +INSERT INTO capitals4 VALUES ('Paris', 543.21, 987, 'FR'); +SELECT * FROM capitals4; +SELECT * FROM cities4; +DELETE FROM cities4 WHERE name = 'Moscow'; +SELECT * FROM capitals4; +SELECT * FROM cities4; +DROP TABLE capitals4; +DROP TABLE cities4; + +-- OK: + +CREATE TEMPORARY TABLE cities5 (name text, population float, altitude int); +CREATE FAST TEMPORARY TABLE capitals5 (state char(2)) INHERITS (cities5); +INSERT INTO capitals5 VALUES ('Moscow', 123.45, 789, 'RU'); +INSERT INTO capitals5 VALUES ('Paris', 543.21, 987, 'FR'); +SELECT * FROM capitals5; +SELECT * FROM cities5; +DELETE FROM cities5 WHERE name = 'Moscow'; +SELECT * FROM capitals5; +SELECT * FROM cities5; +DROP TABLE capitals5; +DROP TABLE cities5; + +-- OK: + +CREATE FAST TEMPORARY TABLE cities6 (name text, population float, altitude int); +CREATE TEMPORARY TABLE capitals6 (state char(2)) INHERITS (cities6); +INSERT INTO capitals6 VALUES ('Moscow', 123.45, 789, 'RU'); +INSERT INTO capitals6 VALUES ('Paris', 543.21, 987, 'FR'); +SELECT * FROM capitals6; +SELECT * FROM cities6; +DELETE FROM cities6 WHERE name = 'Moscow'; +SELECT * FROM capitals6; +SELECT * FROM cities6; +DROP TABLE capitals6; +DROP TABLE cities6; + +-- test index-only scan + +CREATE FAST TEMP TABLE fasttab_unique_prefix_beta(x int); +CREATE TABLE fasttab_unique_prefix_alpha(x int); +CREATE FAST TEMP TABLE fasttab_unique_prefix_delta(x int); +CREATE TABLE fasttab_unique_prefix_epsilon(x int); +CREATE TABLE fasttab_unique_prefix_gamma(x int); +SELECT relname FROM pg_class WHERE relname > 'fasttab_unique_prefix_' ORDER BY relname LIMIT 5; +DROP TABLE fasttab_unique_prefix_alpha; +DROP TABLE fasttab_unique_prefix_beta; +DROP TABLE fasttab_unique_prefix_gamma; +DROP TABLE fasttab_unique_prefix_delta; +DROP TABLE fasttab_unique_prefix_epsilon; + +-- test VACUUM / VACUUM FULL + +VACUUM; +VACUUM FULL; +SELECT * FROM fast_table_1; + +-- test ANALYZE + +CREATE FAST TEMP TABLE fasttab_analyze_test(x int, s text); +INSERT INTO fasttab_analyze_test SELECT x, '--> ' || x FROM generate_series(1,100) as x; +ANALYZE fasttab_analyze_test; +SELECT count(*) FROM pg_statistic WHERE starelid = (SELECT oid FROM pg_class WHERE relname = 'fasttab_analyze_test'); +DROP TABLE fasttab_analyze_test; +SELECT count(*) FROM pg_statistic WHERE starelid = (SELECT oid FROM pg_class WHERE relname = 'fasttab_analyze_test'); + +-- cleanup after load test + +do $$ +declare + count_fast_table integer = 150; + t_sql text; +begin + for i in 1 .. count_fast_table + loop + t_sql = 'DROP TABLE fast_table_' || i || ';'; + execute t_sql; + end loop; +end $$; +