From eb5ff17063fe9e912d5afeee2d9ac77031cb972a Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Wed, 11 Dec 2024 15:43:05 +0000 Subject: [PATCH 1/2] 8346011: [Lilliput] Compact Full-GC Forwarding --- src/hotspot/share/gc/g1/g1Arguments.cpp | 3 - src/hotspot/share/gc/g1/g1FullCollector.cpp | 4 + .../share/gc/parallel/parallelArguments.cpp | 3 - .../share/gc/parallel/psParallelCompact.cpp | 4 + .../share/gc/serial/serialArguments.cpp | 7 - .../share/gc/serial/serialArguments.hpp | 1 - src/hotspot/share/gc/serial/serialFullGC.cpp | 4 + .../share/gc/shared/fullGCForwarding.cpp | 174 +++++++++++++++--- .../share/gc/shared/fullGCForwarding.hpp | 165 +++++++++++++++-- .../gc/shared/fullGCForwarding.inline.hpp | 120 +++++++++--- .../gc/shenandoah/shenandoahArguments.cpp | 3 - .../share/gc/shenandoah/shenandoahFullGC.cpp | 4 + src/hotspot/share/utilities/fastHash.hpp | 102 ++++++++++ 13 files changed, 515 insertions(+), 79 deletions(-) create mode 100644 src/hotspot/share/utilities/fastHash.hpp diff --git a/src/hotspot/share/gc/g1/g1Arguments.cpp b/src/hotspot/share/gc/g1/g1Arguments.cpp index 1ae5f2ce76f..55012d96032 100644 --- a/src/hotspot/share/gc/g1/g1Arguments.cpp +++ b/src/hotspot/share/gc/g1/g1Arguments.cpp @@ -34,7 +34,6 @@ #include "gc/g1/g1HeapRegionRemSet.hpp" #include "gc/g1/g1HeapVerifier.hpp" #include "gc/shared/cardTable.hpp" -#include "gc/shared/fullGCForwarding.hpp" #include "gc/shared/gcArguments.hpp" #include "gc/shared/workerPolicy.hpp" #include "runtime/globals.hpp" @@ -244,8 +243,6 @@ void G1Arguments::initialize() { if (max_parallel_refinement_threads > UINT_MAX / divisor) { vm_exit_during_initialization("Too large parallelism for remembered sets."); } - - FullGCForwarding::initialize_flags(heap_reserved_size_bytes()); } CollectedHeap* G1Arguments::create_heap() { diff --git a/src/hotspot/share/gc/g1/g1FullCollector.cpp b/src/hotspot/share/gc/g1/g1FullCollector.cpp index 5789b44e618..5908c059ef5 100644 --- a/src/hotspot/share/gc/g1/g1FullCollector.cpp +++ b/src/hotspot/share/gc/g1/g1FullCollector.cpp @@ -212,6 +212,8 @@ void G1FullCollector::collect() { // Don't add any more derived pointers during later phases deactivate_derived_pointers(); + FullGCForwarding::begin(); + phase2_prepare_compaction(); if (has_compaction_targets()) { @@ -224,6 +226,8 @@ void G1FullCollector::collect() { log_info(gc, phases) ("No Regions selected for compaction. Skipping Phase 3: Adjust pointers and Phase 4: Compact heap"); } + FullGCForwarding::end(); + phase5_reset_metadata(); G1CollectedHeap::finish_codecache_marking_cycle(); diff --git a/src/hotspot/share/gc/parallel/parallelArguments.cpp b/src/hotspot/share/gc/parallel/parallelArguments.cpp index d9972ccc146..7dbd6f0fc58 100644 --- a/src/hotspot/share/gc/parallel/parallelArguments.cpp +++ b/src/hotspot/share/gc/parallel/parallelArguments.cpp @@ -27,7 +27,6 @@ #include "gc/parallel/parallelArguments.hpp" #include "gc/parallel/parallelScavengeHeap.hpp" #include "gc/shared/adaptiveSizePolicy.hpp" -#include "gc/shared/fullGCForwarding.hpp" #include "gc/shared/gcArguments.hpp" #include "gc/shared/genArguments.hpp" #include "gc/shared/workerPolicy.hpp" @@ -83,8 +82,6 @@ void ParallelArguments::initialize() { if (FLAG_IS_DEFAULT(ParallelRefProcEnabled) && ParallelGCThreads > 1) { FLAG_SET_DEFAULT(ParallelRefProcEnabled, true); } - - FullGCForwarding::initialize_flags(heap_reserved_size_bytes()); } // The alignment used for boundary between young gen and old gen diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp index f4e3af4407d..3db39d1f0ca 100644 --- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp @@ -1049,12 +1049,16 @@ bool PSParallelCompact::invoke_no_policy(bool clear_all_soft_refs) { DerivedPointerTable::set_active(false); #endif + FullGCForwarding::begin(); + forward_to_new_addr(); adjust_pointers(); compact(); + FullGCForwarding::end(); + ParCompactionManager::_preserved_marks_set->restore(&ParallelScavengeHeap::heap()->workers()); ParCompactionManager::verify_all_region_stack_empty(); diff --git a/src/hotspot/share/gc/serial/serialArguments.cpp b/src/hotspot/share/gc/serial/serialArguments.cpp index c9d2caf9063..c49907ab8c2 100644 --- a/src/hotspot/share/gc/serial/serialArguments.cpp +++ b/src/hotspot/share/gc/serial/serialArguments.cpp @@ -23,16 +23,9 @@ */ #include "precompiled.hpp" -#include "gc/shared/fullGCForwarding.hpp" -#include "gc/shared/gcArguments.hpp" #include "gc/serial/serialArguments.hpp" #include "gc/serial/serialHeap.hpp" -void SerialArguments::initialize() { - GCArguments::initialize(); - FullGCForwarding::initialize_flags(MaxHeapSize); -} - CollectedHeap* SerialArguments::create_heap() { return new SerialHeap(); } diff --git a/src/hotspot/share/gc/serial/serialArguments.hpp b/src/hotspot/share/gc/serial/serialArguments.hpp index 90c3225ff8d..3ed4df5f41b 100644 --- a/src/hotspot/share/gc/serial/serialArguments.hpp +++ b/src/hotspot/share/gc/serial/serialArguments.hpp @@ -31,7 +31,6 @@ class CollectedHeap; class SerialArguments : public GenArguments { private: - virtual void initialize(); virtual CollectedHeap* create_heap(); }; diff --git a/src/hotspot/share/gc/serial/serialFullGC.cpp b/src/hotspot/share/gc/serial/serialFullGC.cpp index 0df28fa7bd5..1a2544003d5 100644 --- a/src/hotspot/share/gc/serial/serialFullGC.cpp +++ b/src/hotspot/share/gc/serial/serialFullGC.cpp @@ -697,6 +697,8 @@ void SerialFullGC::invoke_at_safepoint(bool clear_all_softrefs) { phase1_mark(clear_all_softrefs); + FullGCForwarding::begin(); + Compacter compacter{gch}; { @@ -740,6 +742,8 @@ void SerialFullGC::invoke_at_safepoint(bool clear_all_softrefs) { restore_marks(); + FullGCForwarding::end(); + deallocate_stacks(); SerialFullGC::_string_dedup_requests->flush(); diff --git a/src/hotspot/share/gc/shared/fullGCForwarding.cpp b/src/hotspot/share/gc/shared/fullGCForwarding.cpp index 4880b08887e..864fff987d9 100644 --- a/src/hotspot/share/gc/shared/fullGCForwarding.cpp +++ b/src/hotspot/share/gc/shared/fullGCForwarding.cpp @@ -24,34 +24,166 @@ #include "precompiled.hpp" #include "gc/shared/fullGCForwarding.hpp" -#include "memory/memRegion.hpp" -#include "runtime/globals_extension.hpp" +#include "logging/log.hpp" +#include "nmt/memTag.hpp" +#include "utilities/ostream.hpp" +#include "utilities/concurrentHashTable.inline.hpp" +#include "utilities/fastHash.hpp" +#include "utilities/powerOfTwo.hpp" -HeapWord* FullGCForwarding::_heap_base = nullptr; -int FullGCForwarding::_num_low_bits = 0; +static uintx hash(HeapWord* const& addr) { + uint64_t val = reinterpret_cast(addr); + uint32_t hash = FastHash::get_hash32((uint32_t)val, (uint32_t)(val >> 32)); + return hash; +} -void FullGCForwarding::initialize_flags(size_t max_heap_size) { -#ifdef _LP64 - size_t max_narrow_heap_size = right_n_bits(NumLowBitsNarrow - Shift); - if (UseCompactObjectHeaders && max_heap_size > max_narrow_heap_size * HeapWordSize) { - warning("Compact object headers require a java heap size smaller than " SIZE_FORMAT - "%s (given: " SIZE_FORMAT "%s). Disabling compact object headers.", - byte_size_in_proper_unit(max_narrow_heap_size * HeapWordSize), - proper_unit_for_byte_size(max_narrow_heap_size * HeapWordSize), - byte_size_in_proper_unit(max_heap_size), - proper_unit_for_byte_size(max_heap_size)); - FLAG_SET_ERGO(UseCompactObjectHeaders, false); +struct ForwardingEntry { + HeapWord* _from; + HeapWord* _to; + ForwardingEntry(HeapWord* from, HeapWord* to) : _from(from), _to(to) {} +}; + +struct FallbackTableConfig { + using Value = ForwardingEntry; + static uintx get_hash(Value const& entry, bool* is_dead) { + return hash(entry._from); + } + static void* allocate_node(void* context, size_t size, Value const& value) { + return AllocateHeap(size, mtGC); + } + static void free_node(void* context, void* memory, Value const& value) { + FreeHeap(memory); + } +}; + +class FallbackTable : public ConcurrentHashTable { + +}; + +class FallbackTableLookup : public StackObj { + ForwardingEntry const _entry; +public: + explicit FallbackTableLookup(HeapWord* from) : _entry(from, nullptr) {} + uintx get_hash() const { + return hash(_entry._from); + } + bool equals(ForwardingEntry* value) { + return _entry._from == value->_from; } + bool is_dead(ForwardingEntry* value) { return false; } +}; + +// We cannot use 0, because that may already be a valid base address in zero-based heaps. +// 0x1 is safe because heap base addresses must be aligned by much larger alignment +HeapWord* const FullGCForwarding::UNUSED_BASE = reinterpret_cast(0x1); + +HeapWord* FullGCForwarding::_heap_start = nullptr; +size_t FullGCForwarding::_heap_start_region_bias = 0; +size_t FullGCForwarding::_num_regions = 0; +uintptr_t FullGCForwarding::_region_mask = 0; +HeapWord** FullGCForwarding::_biased_bases = nullptr; +HeapWord** FullGCForwarding::_bases_table = nullptr; +FallbackTable* FullGCForwarding::_fallback_table = nullptr; +#ifndef PRODUCT +volatile uint64_t FullGCForwarding::_num_forwardings = 0; +volatile uint64_t FullGCForwarding::_num_fallback_forwardings = 0; #endif -} void FullGCForwarding::initialize(MemRegion heap) { #ifdef _LP64 - _heap_base = heap.start(); - if (UseCompactObjectHeaders) { - _num_low_bits = NumLowBitsNarrow; - } else { - _num_low_bits = NumLowBitsWide; + _heap_start = heap.start(); + + size_t rounded_heap_size = round_up_power_of_2(heap.byte_size()); + + _num_regions = (rounded_heap_size / BytesPerWord) / BLOCK_SIZE_WORDS; + + _heap_start_region_bias = (uintptr_t)_heap_start >> BLOCK_SIZE_BYTES_SHIFT; + _region_mask = ~((uintptr_t(1) << BLOCK_SIZE_BYTES_SHIFT) - 1); + + guarantee((_heap_start_region_bias << BLOCK_SIZE_BYTES_SHIFT) == (uintptr_t)_heap_start, "must be aligned: _heap_start_region_bias: " SIZE_FORMAT ", _region_size_byte_shift: %u, _heap_start: " PTR_FORMAT, _heap_start_region_bias, BLOCK_SIZE_BYTES_SHIFT, p2i(_heap_start)); + + assert(_bases_table == nullptr, "should not be initialized yet"); + assert(_fallback_table == nullptr, "should not be initialized yet"); +#endif +} + +void FullGCForwarding::begin() { +#ifdef _LP64 + assert(_bases_table == nullptr, "should not be initialized yet"); + assert(_fallback_table == nullptr, "should not be initialized yet"); + + _fallback_table = new FallbackTable(); + +#ifndef PRODUCT + _num_forwardings = 0; + _num_fallback_forwardings = 0; +#endif + + size_t max = _num_regions; + _bases_table = NEW_C_HEAP_ARRAY(HeapWord*, max, mtGC); + HeapWord** biased_start = _bases_table - _heap_start_region_bias; + _biased_bases = biased_start; + for (size_t i = 0; i < max; i++) { + _bases_table[i] = UNUSED_BASE; } #endif } + +void FullGCForwarding::end() { +#ifndef PRODUCT + log_info(gc)("Total forwardings: " UINT64_FORMAT ", fallback forwardings: " UINT64_FORMAT + ", ratio: %f, memory used by fallback table: " SIZE_FORMAT "%s, memory used by bases table: " SIZE_FORMAT "%s", + _num_forwardings, _num_fallback_forwardings, (float)_num_forwardings/(float)_num_fallback_forwardings, + byte_size_in_proper_unit(_fallback_table->get_mem_size(Thread::current())), + proper_unit_for_byte_size(_fallback_table->get_mem_size(Thread::current())), + byte_size_in_proper_unit(sizeof(HeapWord*) * _num_regions), + proper_unit_for_byte_size(sizeof(HeapWord*) * _num_regions)); +#endif +#ifdef _LP64 + assert(_bases_table != nullptr, "should be initialized"); + FREE_C_HEAP_ARRAY(HeapWord*, _bases_table); + _bases_table = nullptr; + delete _fallback_table; + _fallback_table = nullptr; +#endif +} + +void FullGCForwarding::fallback_forward_to(HeapWord* from, HeapWord* to) { + assert(to != nullptr, "no null forwarding"); + assert(_fallback_table != nullptr, "should be initialized"); + FallbackTableLookup lookup_f(from); + ForwardingEntry entry(from, to); + auto found_f = [&](ForwardingEntry* found) { + // If dupe has been found, override it with new value. + // This is also called when new entry is succussfully inserted. + if (found->_to != to) { + found->_to = to; + } + }; + Thread* current_thread = Thread::current(); + bool grow; + bool added = _fallback_table->insert_get(current_thread, lookup_f, entry, found_f, &grow); + NOT_PRODUCT(Atomic::inc(&_num_fallback_forwardings);) +#ifdef ASSERT + assert(fallback_forwardee(from) != nullptr, "must have entered forwarding"); + assert(fallback_forwardee(from) == to, "forwarding must be correct, added: %s, from: " PTR_FORMAT ", to: " PTR_FORMAT ", fwd: " PTR_FORMAT, BOOL_TO_STR(added), p2i(from), p2i(to), p2i(fallback_forwardee(from))); +#endif + if (grow) { + _fallback_table->grow(current_thread); + tty->print_cr("grow fallback table to size: " SIZE_FORMAT " bytes", + _fallback_table->get_mem_size(current_thread)); + } +} + +HeapWord* FullGCForwarding::fallback_forwardee(HeapWord* from) { + assert(_fallback_table != nullptr, "fallback table must be present"); + HeapWord* result; + FallbackTableLookup lookup_f(from); + auto found_f = [&](ForwardingEntry* found) { + result = found->_to; + }; + bool found = _fallback_table->get(Thread::current(), lookup_f, found_f); + assert(found, "something must have been found"); + assert(result != nullptr, "must have found forwarding"); + return result; +} diff --git a/src/hotspot/share/gc/shared/fullGCForwarding.hpp b/src/hotspot/share/gc/shared/fullGCForwarding.hpp index a6ca182428b..e9be98db5a0 100644 --- a/src/hotspot/share/gc/shared/fullGCForwarding.hpp +++ b/src/hotspot/share/gc/shared/fullGCForwarding.hpp @@ -25,35 +25,164 @@ #ifndef SHARE_GC_SHARED_FULLGCFORWARDING_HPP #define SHARE_GC_SHARED_FULLGCFORWARDING_HPP -#include "memory/allStatic.hpp" +#include "memory/allocation.hpp" #include "memory/memRegion.hpp" #include "oops/markWord.hpp" #include "oops/oopsHierarchy.hpp" -/* - * Implements forwarding for the Full GCs of Serial, Parallel, G1 and Shenandoah in - * a way that preserves upper N bits of object mark-words, which contain crucial - * Klass* information when running with compact headers. The encoding is similar to - * compressed-oops encoding: it basically subtracts the forwardee address from the - * heap-base, shifts that difference into the right place, and sets the lowest two - * bits (to indicate 'forwarded' state as usual). - * With compact-headers, we have 40 bits to encode forwarding pointers. This is - * enough to address 8TB of heap. If the heap size exceeds that limit, we turn off - * compact headers. +class FallbackTable; +class Mutex; + +/** + * FullGCForwarding is a method to store forwarding information in a compressed form into the object header, + * that has been specifically designed for sliding compacting GCs and compact object headers. With compact object + * headers, we store the compressed class pointer in the header, which would be overwritten by full forwarding + * pointers, if we allow the legacy forwarding code to act. This would lose the class information for the object, + * which is required later in GC cycle to iterate the reference fields and get the object size for copying. + * + * FullGCForwarding requires only small side tables and guarantees constant-time access and modification. + * + * The key advantage of sliding compaction for encoding efficiency: + * - It forwards objects linearily, starting at the heap bottom and moving up to the top, sliding + * live objects towards the bottom of the heap. (The reality in parallel or regionalized GCs is a bit more + * complex, but conceptually it is the same.) + * - Objects starting in any one block can only be forwarded to a memory region that is not larger than + * a block. (There are exceptions to this rule which are discussed below.) + * + * This is an intuitive property: when we slide the compact block full of data, it can not take up more + * memory afterwards. + * This property allows us to use a side table to record the addresses of the target memory region for + * each block. The table holds N entries for N blocks. For each block, it gives the base + * address of the target regions, or a special placeholder if not used. + * + * This encoding efficiency allows to store the forwarding information in the object header _together_ with the + * compressed class pointer. + * + * The idea is to use a pointer compression scheme very similar to the one that is used for compressed oops. + * We divide the heap into number of equal-sized blocks. Each block spans a maximum of 2^NUM_OFFSET_BITS words. + * We maintain a side-table of target-base-addresses, with one address entry per block. + * + * When recording the sliding forwarding, the mark word would look roughly like this: + * + * 32 0 + * [.....................OOOOOOOOOTT] + * ^------ tag-bits, indicates 'forwarded' + * ^-------- in-region offset + * ^----------------- protected area, *not touched* by this code, useful for + * compressed class pointer with compact object headers + * + * Adding a forwarding then generally works as follows: + * 1. Compute the index of the block of the "from" address. + * 2. Load the target-base-offset of the from-block from the side-table. + * 3. If the base-offset is not-yet set, set it to the to-address of the forwarding. + * (In other words, the first forwarding of a block determines the target base-offset.) + * 4. Compute the offset of the to-address in the target region. + * 4. Store offset in the object header. + * + * Similarly, looking up the target address, given an original object address generally works as follows: + * 1. Compute the index of the block of the "from" address. + * 2. Load the target-base-offset of the from-block from the side-table. + * 3. Extract the offset from the object header. + * 4. Compute the "to" address from "to" region base and "offset" + * + * We reserve one special value for the offset: + * - 111111111: Indicates an exceptional forwarding (see below), for which a fallback hash-table + * is used to look up the target address. + * + * In order to support this, we need to make a change to the above algorithm: + * - Forwardings that would use offsets >= 111111111 (i.e. the last slot) + * would also need to use the fallback-table. We expect that to be relatively rare for two reasons: + * 1. It only affects 1 out of 512 possible offsets, in other words, 1/512th of all situations in an equal + * distribution. + * 2. Forwardings are not equally-distributed, because normally we 'skip' unreachable objects, + * thus compacting the block. Forwardings tend to cluster at the beginning of the target region, + * and become less likely towards the end of the possible encodable target address range. + * Which means in reality it will be much less frequent than 1/512. + * + * There are several conditions when the above algorithm would be broken because the assumption that + * 'objects from each block can only get forwarded to a region of block-size' is violated: + * - G1 last-ditch serial compaction: there, object from a single region can be forwarded to multiple, + * more than two regions. G1 serial compaction is not very common - it is the last-last-ditch GC + * that is used when the JVM is scrambling to squeeze more space out of the heap, and at that point, + * ultimate performance is no longer the main concern. + * - When forwarding hits a space (or G1/Shenandoah region) boundary, then latter objects of a block + * need to be forwarded to a different address range than earlier objects in the same block. + * This is rare. + * - With compact identity hash-code, objects can grow, and in the worst case use up more memory in + * the target block than we can address. We expect that to be rare. + * + * To deal with that, we initialize a fallback-hashtable for storing those extra forwardings, and use a special + * offset pattern (0b11...1) to indicate that the forwardee is not encoded but should be looked-up in the hashtable. + * This implies that this particular offset (the last word of a block) can not be used directly as forwarding, + * but also has to be handled by the fallback-table. */ class FullGCForwarding : public AllStatic { - static const int NumLowBitsNarrow = LP64_ONLY(markWord::klass_shift) NOT_LP64(0 /*unused*/); - static const int NumLowBitsWide = BitsPerWord; - static const int Shift = markWord::lock_bits + markWord::lock_shift; +private: + static const int AVAILABLE_LOW_BITS = 11; + static const int AVAILABLE_BITS_MASK = right_n_bits(AVAILABLE_LOW_BITS); + // The offset bits start after the lock-bits, which are currently used by Serial GC + // for marking objects. Could be 1 for Serial GC when being clever with the bits, + // and 0 for all other GCs. + static const int OFFSET_BITS_SHIFT = markWord::lock_shift + markWord::lock_bits; + + // How many bits we use for the offset + static const int NUM_OFFSET_BITS = AVAILABLE_LOW_BITS - OFFSET_BITS_SHIFT; + static const size_t BLOCK_SIZE_WORDS = 1 << NUM_OFFSET_BITS; + static const int BLOCK_SIZE_BYTES_SHIFT = NUM_OFFSET_BITS + LogHeapWordSize; + static const size_t MAX_OFFSET = BLOCK_SIZE_WORDS - 2; + static const uintptr_t OFFSET_MASK = right_n_bits(NUM_OFFSET_BITS) << OFFSET_BITS_SHIFT; + + // This offset bit-pattern indicates that the actual mapping is handled by the + // fallback-table. This also implies that this cannot be used as a valid offset, + // and we must also use the fallback-table for mappings to the last word of a + // block. + static const uintptr_t FALLBACK_PATTERN = right_n_bits(NUM_OFFSET_BITS); + static const uintptr_t FALLBACK_PATTERN_IN_PLACE = FALLBACK_PATTERN << OFFSET_BITS_SHIFT; + + // Indicates an unused base address in the target base table. + static HeapWord* const UNUSED_BASE; + + static HeapWord* _heap_start; + + static size_t _heap_start_region_bias; + static size_t _num_regions; + static uintptr_t _region_mask; + + // The target base table memory. + static HeapWord** _bases_table; + // Entries into the target base tables, biased to the start of the heap. + static HeapWord** _biased_bases; + + static FallbackTable* _fallback_table; + +#ifndef PRODUCT + static volatile uint64_t _num_forwardings; + static volatile uint64_t _num_fallback_forwardings; +#endif + + static inline size_t biased_region_index_containing(HeapWord* addr); + + static inline bool is_fallback(uintptr_t encoded); + static inline uintptr_t encode_forwarding(HeapWord* from, HeapWord* to); + static inline HeapWord* decode_forwarding(HeapWord* from, uintptr_t encoded); + + static void fallback_forward_to(HeapWord* from, HeapWord* to); + static HeapWord* fallback_forwardee(HeapWord* from); + + static inline void forward_to_impl(oop from, oop to); + static inline oop forwardee_impl(oop from); - static HeapWord* _heap_base; - static int _num_low_bits; public: - static void initialize_flags(size_t max_heap_size); static void initialize(MemRegion heap); + + static void begin(); + static void end(); + + static inline bool is_forwarded(oop obj); + static inline bool is_not_forwarded(oop obj); + static inline void forward_to(oop from, oop to); static inline oop forwardee(oop from); - static inline bool is_forwarded(oop obj); }; #endif // SHARE_GC_SHARED_FULLGCFORWARDING_HPP diff --git a/src/hotspot/share/gc/shared/fullGCForwarding.inline.hpp b/src/hotspot/share/gc/shared/fullGCForwarding.inline.hpp index ebd280a454f..2245f30f020 100644 --- a/src/hotspot/share/gc/shared/fullGCForwarding.inline.hpp +++ b/src/hotspot/share/gc/shared/fullGCForwarding.inline.hpp @@ -19,42 +19,116 @@ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. - * */ -#ifndef GC_SHARED_FULLGCFORWARDING_INLINE_HPP -#define GC_SHARED_FULLGCFORWARDING_INLINE_HPP +#ifndef SHARE_GC_SHARED_FULLGCFORWARDING_INLINE_HPP +#define SHARE_GC_SHARED_FULLGCFORWARDING_INLINE_HPP +#include "gc/shared/gc_globals.hpp" #include "gc/shared/fullGCForwarding.hpp" - +#include "oops/markWord.hpp" #include "oops/oop.inline.hpp" -#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +inline bool FullGCForwarding::is_forwarded(oop obj) { + return obj->is_forwarded(); +} + +size_t FullGCForwarding::biased_region_index_containing(HeapWord* addr) { + return (uintptr_t)addr >> BLOCK_SIZE_BYTES_SHIFT; +} + +bool FullGCForwarding::is_fallback(uintptr_t encoded) { + return (encoded & OFFSET_MASK) == FALLBACK_PATTERN_IN_PLACE; +} + +uintptr_t FullGCForwarding::encode_forwarding(HeapWord* from, HeapWord* to) { + size_t from_block_idx = biased_region_index_containing(from); + + HeapWord* to_region_base = _biased_bases[from_block_idx]; + if (to_region_base == UNUSED_BASE) { + _biased_bases[from_block_idx] = to_region_base = to; + } + + // Avoid pointer_delta() on purpose: using an unsigned subtraction, + // we get an underflow when to < to_region_base, which means + // we can use a single comparison instead of: + // if (to_region_base > to || (to - to_region_base) > MAX_OFFSET) { .. } + size_t offset = size_t(to - to_region_base); + if (offset > MAX_OFFSET) { + offset = FALLBACK_PATTERN; + } + uintptr_t encoded = (offset << OFFSET_BITS_SHIFT) | markWord::marked_value; + + assert(is_fallback(encoded) || to == decode_forwarding(from, encoded), "must be reversible"); + assert((encoded & ~AVAILABLE_BITS_MASK) == 0, "must encode to available bits"); + return encoded; +} -void FullGCForwarding::forward_to(oop from, oop to) { +HeapWord* FullGCForwarding::decode_forwarding(HeapWord* from, uintptr_t encoded) { + assert(!is_fallback(encoded), "must not be fallback-forwarded, encoded: " INTPTR_FORMAT ", OFFSET_MASK: " INTPTR_FORMAT ", FALLBACK_PATTERN_IN_PLACE: " INTPTR_FORMAT, encoded, OFFSET_MASK, FALLBACK_PATTERN_IN_PLACE); + assert((encoded & ~AVAILABLE_BITS_MASK) == 0, "must decode from available bits, encoded: " INTPTR_FORMAT, encoded); + uintptr_t offset = (encoded >> OFFSET_BITS_SHIFT); + + size_t from_idx = biased_region_index_containing(from); + HeapWord* base = _biased_bases[from_idx]; + assert(base != UNUSED_BASE, "must not be unused base: encoded: " INTPTR_FORMAT, encoded); + HeapWord* decoded = base + offset; + assert(decoded >= _heap_start, + "Address must be above heap start. encoded: " INTPTR_FORMAT ", base: " PTR_FORMAT, + encoded, p2i(base)); + + return decoded; +} + +inline void FullGCForwarding::forward_to_impl(oop from, oop to) { + assert(_bases_table != nullptr, "call begin() before forwarding"); + + markWord from_header = from->mark(); + HeapWord* from_hw = cast_from_oop(from); + HeapWord* to_hw = cast_from_oop(to); + uintptr_t encoded = encode_forwarding(from_hw, to_hw); + markWord new_header = markWord((from_header.value() & ~OFFSET_MASK) | encoded); + from->set_mark(new_header); + + if (is_fallback(encoded)) { + fallback_forward_to(from_hw, to_hw); + } + NOT_PRODUCT(Atomic::inc(&_num_forwardings);) +} + +inline void FullGCForwarding::forward_to(oop obj, oop fwd) { + assert(fwd != nullptr, "no null forwarding"); #ifdef _LP64 - uintptr_t encoded = pointer_delta(cast_from_oop(to), _heap_base) << Shift; - assert(encoded <= static_cast(right_n_bits(_num_low_bits)), "encoded forwardee must fit"); - uintptr_t mark = from->mark().value(); - mark &= ~right_n_bits(_num_low_bits); - mark |= (encoded | markWord::marked_value); - from->set_mark(markWord(mark)); + assert(_bases_table != nullptr, "expect sliding forwarding initialized"); + forward_to_impl(obj, fwd); + assert(forwardee(obj) == fwd, "must be forwarded to correct forwardee, obj: " PTR_FORMAT ", forwardee(obj): " PTR_FORMAT ", fwd: " PTR_FORMAT ", mark: " INTPTR_FORMAT, p2i(obj), p2i(forwardee(obj)), p2i(fwd), obj->mark().value()); #else - from->forward_to(to); + obj->forward_to(fwd); #endif } -oop FullGCForwarding::forwardee(oop from) { +inline oop FullGCForwarding::forwardee_impl(oop from) { + assert(_bases_table != nullptr, "call begin() before asking for forwarding"); + + markWord header = from->mark(); + HeapWord* from_hw = cast_from_oop(from); + if (is_fallback(header.value())) { + HeapWord* to = fallback_forwardee(from_hw); + return cast_to_oop(to); + } + uintptr_t encoded = header.value() & OFFSET_MASK; + HeapWord* to = decode_forwarding(from_hw, encoded); + return cast_to_oop(to); +} + +inline oop FullGCForwarding::forwardee(oop obj) { #ifdef _LP64 - uintptr_t mark = from->mark().value(); - HeapWord* decoded = _heap_base + ((mark & right_n_bits(_num_low_bits)) >> Shift); - return cast_to_oop(decoded); + assert(_bases_table != nullptr, "expect sliding forwarding initialized"); + return forwardee_impl(obj); #else - return from->forwardee(); + return obj->forwardee(); #endif } -bool FullGCForwarding::is_forwarded(oop obj) { - return obj->mark().is_forwarded(); -} - -#endif // GC_SHARED_FULLGCFORWARDING_INLINE_HPP +#endif // SHARE_GC_SHARED_FULLGCFORWARDING_INLINE_HPP diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp index fa3f9019af4..b95a91ede12 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp @@ -24,7 +24,6 @@ */ #include "precompiled.hpp" -#include "gc/shared/fullGCForwarding.hpp" #include "gc/shared/gcArguments.hpp" #include "gc/shared/tlab_globals.hpp" #include "gc/shared/workerPolicy.hpp" @@ -186,8 +185,6 @@ void ShenandoahArguments::initialize() { if (FLAG_IS_DEFAULT(TLABAllocationWeight)) { FLAG_SET_DEFAULT(TLABAllocationWeight, 90); } - - FullGCForwarding::initialize_flags(MaxHeapSize); } size_t ShenandoahArguments::conservative_max_heap_alignment() { diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp index 3e880271529..8aee12923eb 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp @@ -245,6 +245,8 @@ void ShenandoahFullGC::do_it(GCCause::Cause gc_cause) { // until all phases run together. ShenandoahHeapLocker lock(heap->lock()); + FullGCForwarding::begin(); + phase2_calculate_target_addresses(worker_slices); OrderAccess::fence(); @@ -254,6 +256,8 @@ void ShenandoahFullGC::do_it(GCCause::Cause gc_cause) { phase4_compact_objects(worker_slices); phase5_epilog(); + + FullGCForwarding::end(); } // Resize metaspace diff --git a/src/hotspot/share/utilities/fastHash.hpp b/src/hotspot/share/utilities/fastHash.hpp new file mode 100644 index 00000000000..8179df5795d --- /dev/null +++ b/src/hotspot/share/utilities/fastHash.hpp @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_UTILITIES_FASTHASH_HPP +#define SHARE_UTILITIES_FASTHASH_HPP + +#include "memory/allStatic.hpp" + +class FastHash : public AllStatic { +private: + static void fullmul64(uint64_t& hi, uint64_t& lo, uint64_t op1, uint64_t op2) { +#if defined(__SIZEOF_INT128__) + __uint128_t prod = static_cast<__uint128_t>(op1) * static_cast<__uint128_t>(op2); + hi = static_cast(prod >> 64); + lo = static_cast(prod >> 0); +#else + /* First calculate all of the cross products. */ + uint64_t lo_lo = (op1 & 0xFFFFFFFF) * (op2 & 0xFFFFFFFF); + uint64_t hi_lo = (op1 >> 32) * (op2 & 0xFFFFFFFF); + uint64_t lo_hi = (op1 & 0xFFFFFFFF) * (op2 >> 32); + uint64_t hi_hi = (op1 >> 32) * (op2 >> 32); + + /* Now add the products together. These will never overflow. */ + uint64_t cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; + uint64_t upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; + hi = upper; + lo = (cross << 32) | (lo_lo & 0xFFFFFFFF); +#endif + } + + static void fullmul32(uint32_t& hi, uint32_t& lo, uint32_t op1, uint32_t op2) { + uint64_t x64 = op1, y64 = op2, xy64 = x64 * y64; + hi = (uint32_t)(xy64 >> 32); + lo = (uint32_t)(xy64 >> 0); + } + + static uint64_t ror64(uint64_t x, uint64_t distance) { + distance = distance & (64 - 1); + return (x >> distance) | (x << (64 - distance)); + } + + static uint32_t ror32(uint32_t x, uint32_t distance) { + distance = distance & (32 - 1); + return (x >> distance) | (x << (32 - distance)); + } + +public: + static uint64_t get_hash64(uint64_t x, uint64_t y) { + const uint64_t M = 0x8ADAE89C337954D5; + const uint64_t A = 0xAAAAAAAAAAAAAAAA; // REPAA + const uint64_t H0 = (x ^ y), L0 = (x ^ A); + + uint64_t U0, V0; fullmul64(U0, V0, L0, M); + const uint64_t Q0 = (H0 * M); + const uint64_t L1 = (Q0 ^ U0); + + uint64_t U1, V1; fullmul64(U1, V1, L1, M); + const uint64_t P1 = (V0 ^ M); + const uint64_t Q1 = ror64(P1, L1); + const uint64_t L2 = (Q1 ^ U1); + return V1 ^ L2; + } + + static uint32_t get_hash32(uint32_t x, uint32_t y) { + const uint32_t M = 0x337954D5; + const uint32_t A = 0xAAAAAAAA; // REPAA + const uint32_t H0 = (x ^ y), L0 = (x ^ A); + + uint32_t U0, V0; fullmul32(U0, V0, L0, M); + const uint32_t Q0 = (H0 * M); + const uint32_t L1 = (Q0 ^ U0); + + uint32_t U1, V1; fullmul32(U1, V1, L1, M); + const uint32_t P1 = (V0 ^ M); + const uint32_t Q1 = ror32(P1, L1); + const uint32_t L2 = (Q1 ^ U1); + return V1 ^ L2; + } +}; + +#endif// SHARE_UTILITIES_FASTHASH_HPP From 42ca1c80df1845e6ff86e7955a0949428191e358 Mon Sep 17 00:00:00 2001 From: Roman Kennke Date: Thu, 12 Dec 2024 14:51:53 +0000 Subject: [PATCH 2/2] Fix preservedMarks gtest --- src/hotspot/share/gc/shared/fullGCForwarding.cpp | 2 -- test/hotspot/gtest/gc/shared/test_preservedMarks.cpp | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/hotspot/share/gc/shared/fullGCForwarding.cpp b/src/hotspot/share/gc/shared/fullGCForwarding.cpp index 864fff987d9..f992ff82ec1 100644 --- a/src/hotspot/share/gc/shared/fullGCForwarding.cpp +++ b/src/hotspot/share/gc/shared/fullGCForwarding.cpp @@ -100,8 +100,6 @@ void FullGCForwarding::initialize(MemRegion heap) { _heap_start_region_bias = (uintptr_t)_heap_start >> BLOCK_SIZE_BYTES_SHIFT; _region_mask = ~((uintptr_t(1) << BLOCK_SIZE_BYTES_SHIFT) - 1); - guarantee((_heap_start_region_bias << BLOCK_SIZE_BYTES_SHIFT) == (uintptr_t)_heap_start, "must be aligned: _heap_start_region_bias: " SIZE_FORMAT ", _region_size_byte_shift: %u, _heap_start: " PTR_FORMAT, _heap_start_region_bias, BLOCK_SIZE_BYTES_SHIFT, p2i(_heap_start)); - assert(_bases_table == nullptr, "should not be initialized yet"); assert(_fallback_table == nullptr, "should not be initialized yet"); #endif diff --git a/test/hotspot/gtest/gc/shared/test_preservedMarks.cpp b/test/hotspot/gtest/gc/shared/test_preservedMarks.cpp index 0b438572b2d..29352ebdaa4 100644 --- a/test/hotspot/gtest/gc/shared/test_preservedMarks.cpp +++ b/test/hotspot/gtest/gc/shared/test_preservedMarks.cpp @@ -56,6 +56,8 @@ TEST_VM(PreservedMarks, iterate_and_restore) { ASSERT_MARK_WORD_EQ(o1->mark(), changedMark()); ASSERT_MARK_WORD_EQ(o2->mark(), changedMark()); + FullGCForwarding::begin(); + // Push o1 and o2 to have their marks preserved. pm.push_if_necessary(o1, o1->mark()); pm.push_if_necessary(o2, o2->mark()); @@ -74,4 +76,6 @@ TEST_VM(PreservedMarks, iterate_and_restore) { pm.restore(); ASSERT_MARK_WORD_EQ(o3->mark(), changedMark()); ASSERT_MARK_WORD_EQ(o4->mark(), changedMark()); + + FullGCForwarding::end(); }