-
Notifications
You must be signed in to change notification settings - Fork 305
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
accounts-index: allow evicting refcount=2 entries from in-memory index (wip) #3184
base: master
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,6 +89,10 @@ impl<T: IndexValue> PossibleEvictions<T> { | |
|
||
// one instance of this represents one bin of the accounts index. | ||
pub struct InMemAccountsIndex<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> { | ||
// The highest slot seen in the index. It is for evicting ref_count > 1 | ||
// entries (see should_evict_from_mem(...)). | ||
highest_slot: AtomicU64, | ||
|
||
last_age_flushed: AtomicAge, | ||
|
||
// backing store | ||
|
@@ -191,6 +195,7 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> InMemAccountsIndex<T, | |
), | ||
num_ages_to_distribute_flushes, | ||
startup_stats: Arc::clone(&storage.startup_stats), | ||
highest_slot: AtomicU64::default(), | ||
} | ||
} | ||
|
||
|
@@ -1008,24 +1013,47 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> InMemAccountsIndex<T, | |
// this could be tunable dynamically based on memory pressure | ||
// we could look at more ages or we could throw out more items we are choosing to keep in the cache | ||
if Self::should_evict_based_on_age(current_age, entry, startup, ages_flushing_now) { | ||
if entry.ref_count() != 1 { | ||
Self::update_stat(&self.stats().held_in_mem.ref_count, 1); | ||
(false, None) | ||
} else { | ||
// only read the slot list if we are planning to throw the item out | ||
let slot_list = entry.slot_list.read().unwrap(); | ||
if slot_list.len() != 1 { | ||
if update_stats { | ||
Self::update_stat(&self.stats().held_in_mem.slot_list_len, 1); | ||
match entry.ref_count() { | ||
1 | 2 => { | ||
// only read the slot list if we are planning to throw the item out | ||
let slot_list = entry.slot_list.read().unwrap(); | ||
let mut cached = false; | ||
let mut oldest_slot = Slot::MAX; | ||
let mut newest_slot = Slot::MIN; | ||
for (slot, info) in slot_list.iter() { | ||
cached = cached || info.is_cached(); | ||
oldest_slot = oldest_slot.min(*slot); | ||
newest_slot = newest_slot.max(*slot); | ||
} | ||
(false, None) // keep 0 and > 1 slot lists in mem. They will be cleaned or shrunk soon. | ||
} else { | ||
// keep items with slot lists that contained cached items | ||
let evict = !slot_list.iter().any(|(_, info)| info.is_cached()); | ||
if !evict && update_stats { | ||
Self::update_stat(&self.stats().held_in_mem.slot_list_cached, 1); | ||
let current_slot = newest_slot | ||
.max(self.highest_slot.fetch_max(newest_slot, Ordering::Relaxed)); | ||
if cached { | ||
// Don't evict when entry is cached. | ||
if update_stats { | ||
Self::update_stat(&self.stats().held_in_mem.slot_list_cached, 1); | ||
} | ||
(false, None) | ||
} else { | ||
// Evict when ref_count == 1 or oldest_slot is more than one epoch old | ||
if slot_list.len() == 1 | ||
|| current_slot.saturating_sub(oldest_slot) | ||
> solana_sdk::epoch_schedule::DEFAULT_SLOTS_PER_EPOCH | ||
{ | ||
(true, Some(slot_list)) | ||
} else { | ||
if update_stats { | ||
Self::update_stat(&self.stats().held_in_mem.slot_list_len, 1); | ||
} | ||
(false, None) | ||
} | ||
} | ||
} | ||
_ => { | ||
// Don't evict when ref_count > 2. | ||
if update_stats { | ||
Self::update_stat(&self.stats().held_in_mem.ref_count, 1); | ||
} | ||
(evict, if evict { Some(slot_list) } else { None }) | ||
(false, None) | ||
} | ||
} | ||
} else { | ||
|
@@ -1255,10 +1283,6 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> InMemAccountsIndex<T, | |
// If the entry *was* updated, re-mark it as dirty then | ||
// skip to the next pubkey/entry. | ||
let ref_count = v.ref_count(); | ||
if ref_count != 1 || slot_list.len() != 1 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we write slot_list.len() != 1, disk index creates the data file for the bucket. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. We'd better not write slit_list.len()!=1. And they shouldn't be long lived. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I pushed a commit that allows single_slot_ref_2 to be stored in index bucket. |
||
v.set_dirty(true); | ||
break; | ||
} | ||
disk.try_write( | ||
&k, | ||
( | ||
|
@@ -1570,35 +1594,67 @@ mod tests { | |
bucket | ||
} | ||
|
||
/// Test should_evict_from_mem() for different ref_counts. | ||
/// | ||
/// This test that under normal condition - where no entries are one epoch | ||
/// old, only ref_count = 1 entries should be evicted. | ||
#[test] | ||
fn test_should_evict_from_mem_ref_count() { | ||
for ref_count in [0, 1, 2] { | ||
for ref_count in [0, 1, 2, 3] { | ||
let bucket = new_for_test::<u64>(); | ||
let startup = false; | ||
let current_age = 0; | ||
let one_element_slot_list = vec![(0, 0)]; | ||
let one_element_slot_list_entry = Arc::new(AccountMapEntryInner::new( | ||
one_element_slot_list, | ||
|
||
let slot_list: Vec<_> = (0..ref_count).map(|i| (i, i)).collect(); | ||
let slot_list_entry = Arc::new(AccountMapEntryInner::new( | ||
slot_list, | ||
ref_count, | ||
AccountMapEntryMeta::default(), | ||
)); | ||
|
||
// exceeded budget | ||
assert_eq!( | ||
bucket | ||
.should_evict_from_mem( | ||
current_age, | ||
&one_element_slot_list_entry, | ||
startup, | ||
false, | ||
1, | ||
) | ||
.should_evict_from_mem(current_age, &slot_list_entry, startup, false, 1,) | ||
.0, | ||
ref_count == 1 | ||
); | ||
} | ||
} | ||
|
||
/// Test should_evict_from_mem() for different ref_counts with one epoch old | ||
/// entries in slot_list. | ||
/// | ||
/// This test the case for entries are one epoch old, ref_count = 1 and | ||
/// ref_count = 2 entries should be evicted. | ||
#[test] | ||
fn test_should_evict_from_mem_ref_count_one_epoch_old() { | ||
for (ref_count, expected) in [(0, false), (1, true), (2, true), (3, false)] { | ||
let bucket = new_for_test::<u64>(); | ||
let startup = false; | ||
let current_age = 0; | ||
let num_slots_per_epoch = solana_sdk::epoch_schedule::DEFAULT_SLOTS_PER_EPOCH; | ||
let slot_list: Vec<_> = (0..ref_count) | ||
.map(|i| { | ||
if i == 0 { | ||
(i, i) | ||
} else { | ||
(i + num_slots_per_epoch, i) | ||
} | ||
}) | ||
.collect(); | ||
let slot_list_entry = Arc::new(AccountMapEntryInner::new( | ||
slot_list, | ||
ref_count, | ||
AccountMapEntryMeta::default(), | ||
)); | ||
assert_eq!( | ||
bucket | ||
.should_evict_from_mem(current_age, &slot_list_entry, startup, false, 1,) | ||
.0, | ||
expected | ||
); | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_gather_possible_evictions() { | ||
solana_logger::setup(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i don't think we care if oldest or newest is older than an epoch. Also, we can't write a slotlist.len() = 2 easily at all. I think the refcount==2, slot_list.len() == 1 is writable with 1 bit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Age is only u8. Even with max age to flush, there is going to be 102s max (255*400ms). I was thinking this is too frequent.
After we flush these ref_count > 1 to disk, someone may touch them again, and we bring it to memory.
That's why I added one epoch old check, to make sure they are really cold (i.e. in ancient storage) then we flush.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm confused with the connection to age here. Age is the u8 used to try to write items to disk in a bucket.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My understanding as follows.
Every 400ms, age increase by 1.
For each entry inserted, it will set it age_to_flush based on whether it is in cache
if not in cache, current_age + 5
if in cache, current_age + 255
So ref_count >2 entry if it is not in cache, after 5 age, then it is about 2s. then it will be flushed to disk if allowed.
Are these correct?