From dd25f0677101ab70de174949900741eb67cbb55a Mon Sep 17 00:00:00 2001 From: Steven Eker Date: Fri, 6 Dec 2024 10:41:01 -0800 Subject: [PATCH] New allocation algorithm (#1166) Replaced algorithm in class arena with one that uses mmap()'d address spaces and demand paging for each semispace and initializes the current semispace of each arena at start time to avoid and the second semispace of each arena at swap time to avoid relying on Undefined Behavior or having an extra test in kore_arena_alloc(). --------- Co-authored-by: Dwight Guth --- include/runtime/arena.h | 145 ++++++++++++++++----- runtime/alloc/arena.cpp | 247 ++++++------------------------------ runtime/collect/collect.cpp | 17 +-- 3 files changed, 151 insertions(+), 258 deletions(-) diff --git a/include/runtime/arena.h b/include/runtime/arena.h index fc72155fb..3f71445e4 100644 --- a/include/runtime/arena.h +++ b/include/runtime/arena.h @@ -1,38 +1,49 @@ #ifndef ARENA_H #define ARENA_H +#include #include +#include #include +#include #include "runtime/alloc.h" extern "C" { +size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024; + // An arena can be used to allocate objects that can then be deallocated all at // once. class arena { public: arena(char id) - : allocation_semispace_id(id) { } + : allocation_semispace_id(id) { + initialize_semispace(); + } // Allocates the requested number of bytes as a contiguous region and returns a // pointer to the first allocated byte. - // If called with requested size greater than the maximun single allocation - // size, the space is allocated in a general (not garbage collected pool). void *kore_arena_alloc(size_t requested); // Returns the address of the first byte that belongs in the given arena. // Returns 0 if nothing has been allocated ever in that arena. - char *arena_start_ptr() const; + char *arena_start_ptr() const { + return current_addr_ptr ? current_addr_ptr + sizeof(memory_block_header) + : nullptr; + } // Returns a pointer to a location holding the address of last allocated // byte in the given arena plus 1. // This address is 0 if nothing has been allocated ever in that arena. - char **arena_end_ptr(); + char **arena_end_ptr() { return &allocation_ptr; } // return the total number of allocatable bytes currently in the arena in its // active semispace. - size_t arena_size() const; + size_t arena_size() const { + update_num_blocks(); + return BLOCK_SIZE * std::max(num_blocks, num_collection_blocks); + } // Clears the current allocation space by setting its start back to its first // block. It is used during garbage collection to effectively collect all of the @@ -41,15 +52,18 @@ class arena { // Resizes the last allocation as long as the resize does not require a new // block allocation. - // Returns the address of the byte following the last newlly allocated byte when - // the resize succeeds, returns 0 otherwise. - void *arena_resize_last_alloc(ssize_t increase); + // Returns the address of the byte following the last newlly allocated byte. + void *arena_resize_last_alloc(ssize_t increase) { + return (allocation_ptr += increase); + } // Returns the given arena's current collection semispace ID. // Each arena has 2 semispace IDs one equal to the arena ID and the other equal // to the 1's complement of the arena ID. At any time one of these semispaces // is used for allocation and the other is used for collection. - char get_arena_collection_semispace_id() const; + char get_arena_collection_semispace_id() const { + return ~allocation_semispace_id; + } // Exchanges the current allocation and collection semispaces and clears the new // current allocation semispace by setting its start back to its first block. @@ -61,7 +75,7 @@ class arena { // by the blocks of that arena. This difference will include blocks containing // sentinel bytes. Undefined behavior will result if the pointers belong to // different arenas. - static ssize_t ptr_diff(char *ptr1, char *ptr2); + static ssize_t ptr_diff(char *ptr1, char *ptr2) { return ptr1 - ptr2; } // Given a starting pointer to an address allocated in an arena and a size in // bytes, this function returns a pointer to an address allocated in the @@ -72,7 +86,10 @@ class arena { // 3rd argument: the address of last allocated byte in the arena plus 1 // Return value: the address allocated in the arena after size bytes from the // starting pointer, or 0 if this is equal to the 3rd argument. - static char *move_ptr(char *ptr, size_t size, char const *arena_end_ptr); + static char *move_ptr(char *ptr, size_t size, char const *arena_end_ptr) { + char *next_ptr = ptr + size; + return (next_ptr == arena_end_ptr) ? 0 : next_ptr; + } // Returns the ID of the semispace where the given address was allocated. // The behavior is undefined if called with an address that has not been @@ -80,34 +97,59 @@ class arena { static char get_arena_semispace_id_of_object(void *ptr); private: - struct memory_block_header { - char *next_block; + union memory_block_header { + // + // Currently the header just holds the semispace id. But we need it to be a + // multiple of sizeof(char*) for alignment purposes so we add a dummy char*. + // char semispace; + char *alignment_dummy; }; - void fresh_block(); - static memory_block_header *mem_block_header(void *ptr); + // + // We update the number of 1MB blocks actually written to, only when we need this value, + // or before a garbage collection rather than trying to determine when we write to a fresh block. + // + void update_num_blocks() const { + // + // Calculate how many 1M blocks of the current arena we used. + // + size_t num_used_blocks + = (allocation_ptr - current_addr_ptr - 1) / BLOCK_SIZE + 1; + if (num_used_blocks > num_blocks) + num_blocks = num_used_blocks; + } + + void initialize_semispace(); - // helper function for `kore_arena_alloc`. Do not call directly. - void *do_alloc_slow(size_t requested); + static memory_block_header *mem_block_header(void *ptr) { + uintptr_t address = reinterpret_cast(ptr); + return reinterpret_cast( + (address - 1) & ~(HYPERBLOCK_SIZE - 1)); + } - char *first_block; // beginning of first block - char *block; // where allocations are being made in current block - char *block_start; // start of current block - char *block_end; // 1 past end of current block - char *first_collection_block; // beginning of other semispace - size_t num_blocks; // number of blocks in current semispace - size_t num_collection_blocks; // number of blocks in other semispace + // + // Current semispace where allocations are being made. + // + char *current_addr_ptr; // pointer to start of current address space + char *allocation_ptr; // next available location in current semispace + char *tripwire; // allocating past this triggers slow allocation + mutable size_t + num_blocks; // notional number of BLOCK_SIZE blocks in current semispace char allocation_semispace_id; // id of current semispace + // + // Semispace where allocations will be made during and after garbage collect. + // + char *collection_addr_ptr + = nullptr; // pointer to start of collection address space + size_t num_collection_blocks + = 0; // notional number of BLOCK_SIZE blocks in collection semispace }; // Macro to define a new arena with the given ID. Supports IDs ranging from 0 to // 127. #define REGISTER_ARENA(name, id) static thread_local arena name(id) -#define MEM_BLOCK_START(ptr) \ - ((char *)(((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1))) - #ifdef __MACH__ // // thread_local disabled for Apple @@ -120,16 +162,51 @@ extern thread_local bool time_for_collection; size_t get_gc_threshold(); inline void *arena::kore_arena_alloc(size_t requested) { - if (block + requested > block_end) { - return do_alloc_slow(requested); + if (allocation_ptr + requested >= tripwire) { + // + // We got close to or past the last location accessed in this address range so far, + // depending on the requested size and tripwire setting. This triggers a garbage + // collect when allowed. + // + time_for_collection = true; + tripwire = current_addr_ptr + + HYPERBLOCK_SIZE; // won't trigger again until arena swap } - void *result = block; - block += requested; + void *result = allocation_ptr; + allocation_ptr += requested; MEM_LOG( - "Allocation at %p (size %zd), next alloc at %p (if it fits)\n", result, - requested, block); + "Allocation at %p (size %zd), next alloc at %p\n", result, requested, + block); return result; } + +inline void arena::arena_clear() { + // + // We set the allocation pointer to the first available address. + // + allocation_ptr = arena_start_ptr(); + // + // If the number of blocks we've touched is >= threshold, we want to trigger + // a garbage collection if we get within 1 block of the end of this area. + // Otherwise we only want to generate a garbage collect if we allocate off the + // end of this area. + // + tripwire = current_addr_ptr + + (num_blocks - (num_blocks >= get_gc_threshold())) * BLOCK_SIZE; } +inline void arena::arena_swap_and_clear() { + update_num_blocks(); // so we save the correct number of touched blocks + std::swap(current_addr_ptr, collection_addr_ptr); + std::swap(num_blocks, num_collection_blocks); + allocation_semispace_id = ~allocation_semispace_id; + if (current_addr_ptr == nullptr) { + // + // The other semispace hasn't be initialized yet. + // + initialize_semispace(); + } else + arena_clear(); +} +} #endif // ARENA_H diff --git a/runtime/alloc/arena.cpp b/runtime/alloc/arena.cpp index c4384642e..9fbde8bf9 100644 --- a/runtime/alloc/arena.cpp +++ b/runtime/alloc/arena.cpp @@ -1,4 +1,3 @@ - #include #include #include @@ -12,226 +11,58 @@ extern size_t const VAR_BLOCK_SIZE = BLOCK_SIZE; -__attribute__((always_inline)) arena::memory_block_header * -arena::mem_block_header(void *ptr) { - // NOLINTNEXTLINE(*-reinterpret-cast) - return reinterpret_cast( - ((uintptr_t)(ptr)-1) & ~(BLOCK_SIZE - 1)); -} - -__attribute__((always_inline)) char -arena::get_arena_collection_semispace_id() const { - return ~allocation_semispace_id; -} - __attribute__((always_inline)) char arena::get_arena_semispace_id_of_object(void *ptr) { return mem_block_header(ptr)->semispace; } -// -// We will reserve enough address space for 1 million 1MB blocks. Might want to increase this on a > 1TB server. -// -size_t const HYPERBLOCK_SIZE = (size_t)BLOCK_SIZE * 1024 * 1024; - -static void *megabyte_malloc() { - // - // Return pointer to a BLOCK_SIZE chunk of memory with BLOCK_SIZE alignment. - // - static thread_local char *currentblock_ptr - = nullptr; // char* rather than void* to permit pointer arithmetic - if (currentblock_ptr) { - // - // We expect an page fault due to not being able to map physical memory to this block or the - // process to be killed by the OOM killer long before we run off the end of our address space. - // - currentblock_ptr += BLOCK_SIZE; - } else { - // - // First call - need to reserve the address space. - // - size_t request = HYPERBLOCK_SIZE; - void *addr = mmap( - nullptr, // let OS choose the address - request, // Linux and MacOS both allow up to 64TB - PROT_READ | PROT_WRITE, // read, write but not execute - MAP_ANONYMOUS | MAP_PRIVATE - | MAP_NORESERVE, // allocate address space only - -1, // no file backing - 0); // no offset - if (addr == MAP_FAILED) { - perror("mmap()"); - abort(); - } - // - // We ask for one block worth of address space less than we allocated so alignment will always succeed. - // We don't worry about unused address space either side of our aligned address space because there will be no - // memory mapped to it. - // - currentblock_ptr = reinterpret_cast( - std::align(BLOCK_SIZE, HYPERBLOCK_SIZE - BLOCK_SIZE, addr, request)); - } - return currentblock_ptr; -} - -#ifdef __MACH__ -// -// thread_local disabled for Apple -// -bool time_for_collection; -#else -thread_local bool time_for_collection; -#endif - -void arena::fresh_block() { - char *next_block = nullptr; - if (block_start == nullptr) { - next_block = (char *)megabyte_malloc(); - first_block = next_block; - auto *next_header = (arena::memory_block_header *)next_block; - next_header->next_block = nullptr; - next_header->semispace = allocation_semispace_id; - num_blocks++; - } else { - next_block = *(char **)block_start; - if (block != block_end) { - if (block_end - block == 8) { - *(uint64_t *)block = NOT_YOUNG_OBJECT_BIT; // 8 bit sentinel value - } else { - *(uint64_t *)block - = block_end - block - 8; // 16-bit or more sentinel value - } - } - if (!next_block) { - MEM_LOG( - "Allocating new block for the first time in arena %d\n", - allocation_semispace_id); - next_block = (char *)megabyte_malloc(); - *(char **)block_start = next_block; - auto *next_header = (arena::memory_block_header *)next_block; - next_header->next_block = nullptr; - next_header->semispace = allocation_semispace_id; - num_blocks++; - time_for_collection = true; - } - } - if (!*(char **)next_block && num_blocks >= get_gc_threshold()) { - time_for_collection = true; - } - block = next_block + sizeof(arena::memory_block_header); - block_start = next_block; - block_end = next_block + BLOCK_SIZE; - MEM_LOG( - "New block at %p (remaining %zd)\n", block, - BLOCK_SIZE - sizeof(arena::memory_block_header)); -} - #ifdef __MACH__ // // thread_local disabled for Apple // +bool time_for_collection = false; bool gc_enabled = true; #else +thread_local bool time_for_collection = false; thread_local bool gc_enabled = true; #endif -__attribute__((noinline)) void *arena::do_alloc_slow(size_t requested) { - MEM_LOG( - "Block at %p too small, %zd remaining but %zd needed\n", block, - block_end - block, requested); - if (requested > BLOCK_SIZE - sizeof(arena::memory_block_header)) { - return malloc(requested); - } - fresh_block(); - void *result = block; - block += requested; - MEM_LOG( - "Allocation at %p (size %zd), next alloc at %p (if it fits)\n", result, - requested, block); - return result; -} - -__attribute__((always_inline)) void * -arena::arena_resize_last_alloc(ssize_t increase) { - if (block + increase <= block_end) { - block += increase; - return block; - } - return nullptr; -} - -__attribute__((always_inline)) void arena::arena_swap_and_clear() { - char *tmp = first_block; - first_block = first_collection_block; - first_collection_block = tmp; - size_t tmp2 = num_blocks; - num_blocks = num_collection_blocks; - num_collection_blocks = tmp2; - allocation_semispace_id = ~allocation_semispace_id; - arena_clear(); -} - -__attribute__((always_inline)) void arena::arena_clear() { - block = first_block ? first_block + sizeof(arena::memory_block_header) - : nullptr; - block_start = first_block; - block_end = first_block ? first_block + BLOCK_SIZE : nullptr; -} - -__attribute__((always_inline)) char *arena::arena_start_ptr() const { - return first_block ? first_block + sizeof(arena::memory_block_header) - : nullptr; -} - -__attribute__((always_inline)) char **arena::arena_end_ptr() { - return █ -} - -char *arena::move_ptr(char *ptr, size_t size, char const *arena_end_ptr) { - char *next_ptr = ptr + size; - if (next_ptr == arena_end_ptr) { - return nullptr; - } - if (next_ptr != MEM_BLOCK_START(ptr) + BLOCK_SIZE) { - return next_ptr; - } - char *next_block = *(char **)MEM_BLOCK_START(ptr); - if (!next_block) { - return nullptr; - } - return next_block + sizeof(arena::memory_block_header); -} - -ssize_t arena::ptr_diff(char *ptr1, char *ptr2) { - if (MEM_BLOCK_START(ptr1) == MEM_BLOCK_START(ptr2)) { - return ptr1 - ptr2; - } - arena::memory_block_header *hdr = mem_block_header(ptr2); - ssize_t result = 0; - while (hdr != mem_block_header(ptr1) && hdr->next_block) { - if (ptr2) { - result += ((char *)hdr + BLOCK_SIZE) - ptr2; - ptr2 = nullptr; - } else { - result += (BLOCK_SIZE - sizeof(arena::memory_block_header)); - } - hdr = (arena::memory_block_header *)hdr->next_block; +void arena::initialize_semispace() { + // + // Current semispace is uninitialized so mmap() a big chuck of address space. + // + size_t request = 2 * HYPERBLOCK_SIZE; + void *addr = mmap( + nullptr, // let OS choose the address + request, // Linux and MacOS both allow up to 64TB + PROT_READ | PROT_WRITE, // read, write but not execute + MAP_ANONYMOUS | MAP_PRIVATE + | MAP_NORESERVE, // allocate address space only + -1, // no file backing + 0); // no offset + if (addr == MAP_FAILED) { + perror("mmap()"); + abort(); } - if (hdr == mem_block_header(ptr1)) { - result += ptr1 - (char *)(hdr + 1); - return result; - } // reached the end of the arena and didn't find the block - // it's possible that the result should be negative, in which - // case the block will have been prior to the block we started - // at. To handle this, we recurse with reversed arguments and - // negate the result. This means that the code might not - // terminate if the two pointers do not belong to the same - // arena. - return -ptr_diff(ptr2, ptr1); -} - -size_t arena::arena_size() const { - return (num_blocks > num_collection_blocks ? num_blocks - : num_collection_blocks) - * (BLOCK_SIZE - sizeof(arena::memory_block_header)); + // + // We allocated 2 * HYPERBLOCK_SIZE worth of address space but we're only going to use 1, aligned on a + // HYPERBLOCK_SIZE boundry. This is so we can get the start of the hyperblock by masking any address within it. + // We don't worry about unused address space either side of our aligned address space because there will be no + // memory mapped to it. + // + current_addr_ptr = reinterpret_cast( + std::align(HYPERBLOCK_SIZE, HYPERBLOCK_SIZE, addr, request)); + // + // We put a memory_block_header at the beginning so we can identify the semispace a pointer belongs to + // id by masking off the low bits to access this memory_block_header. + // + auto *header = reinterpret_cast(current_addr_ptr); + header->semispace = allocation_semispace_id; + allocation_ptr = current_addr_ptr + sizeof(arena::memory_block_header); + // + // We set the tripwire for this space so we get trigger a garbage collection when we pass BLOCK_SIZE of memory + // allocated from this space. + // + tripwire = current_addr_ptr + BLOCK_SIZE; + num_blocks = 1; } diff --git a/runtime/collect/collect.cpp b/runtime/collect/collect.cpp index cc596d205..c439fdd0e 100644 --- a/runtime/collect/collect.cpp +++ b/runtime/collect/collect.cpp @@ -316,22 +316,7 @@ void kore_collect( if (collect_old || !previous_oldspace_alloc_ptr) { scan_ptr = oldspace_ptr(); } else { - if (MEM_BLOCK_START(previous_oldspace_alloc_ptr + 1) - == previous_oldspace_alloc_ptr) { - // this means that the previous oldspace allocation pointer points to an - // address that is megabyte-aligned. This can only happen if we have just - // filled up a block but have not yet allocated the next block in the - // sequence at the start of the collection cycle. This means that the - // allocation pointer is invalid and does not actually point to the next - // address that would have been allocated at, according to the logic of - // kore_arena_alloc, which will have allocated a fresh memory block and put - // the allocation at the start of it. Thus, we use arena::move_ptr with a size - // of zero to adjust and get the true address of the allocation. - scan_ptr - = arena::move_ptr(previous_oldspace_alloc_ptr, 0, *old_alloc_ptr()); - } else { - scan_ptr = previous_oldspace_alloc_ptr; - } + scan_ptr = previous_oldspace_alloc_ptr; } if (scan_ptr != *old_alloc_ptr()) { MEM_LOG("Evacuating old generation\n");