Skip to content

Commit

Permalink
perf: fine-tuned hash function for more hash maps
Browse files Browse the repository at this point in the history
  • Loading branch information
hai-rise committed Aug 23, 2024
1 parent f0bdb21 commit cce4fc9
Show file tree
Hide file tree
Showing 16 changed files with 82 additions and 100 deletions.
9 changes: 4 additions & 5 deletions benches/gigagas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@

use std::{num::NonZeroUsize, thread};

use ahash::AHashMap;
use alloy_primitives::{Address, U160, U256};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use pevm::{
chain::PevmEthereum, execute_revm_parallel, execute_revm_sequential, Bytecodes, EvmAccount,
InMemoryStorage,
chain::PevmEthereum, execute_revm_parallel, execute_revm_sequential, Bytecodes, ChainState,
EvmAccount, InMemoryStorage,
};
use revm::primitives::{BlockEnv, SpecId, TransactTo, TxEnv};

Expand Down Expand Up @@ -98,8 +97,8 @@ pub fn bench_erc20(c: &mut Criterion) {

pub fn bench_uniswap(c: &mut Criterion) {
let block_size = (GIGA_GAS as f64 / uniswap::GAS_LIMIT as f64).ceil() as usize;
let mut final_state = AHashMap::from([(Address::ZERO, EvmAccount::default())]); // Beneficiary
let mut final_bytecodes = Bytecodes::new();
let mut final_state = ChainState::from_iter([(Address::ZERO, EvmAccount::default())]); // Beneficiary
let mut final_bytecodes = Bytecodes::default();
let mut final_txs = Vec::<TxEnv>::new();
for _ in 0..block_size {
let (state, bytecodes, txs) = uniswap::generate_cluster(1, 1);
Expand Down
13 changes: 3 additions & 10 deletions src/chain.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Chain specific utils

use std::{collections::HashMap, fmt::Debug};
use std::fmt::Debug;

use alloy_primitives::{B256, U256};
use alloy_rpc_types::{BlockTransactions, Header, Transaction};
Expand All @@ -9,10 +9,7 @@ use revm::{
Handler,
};

use crate::{
mv_memory::{LazyAddresses, MvMemory},
BuildIdentityHasher, PevmTxExecutionResult,
};
use crate::{mv_memory::MvMemory, PevmTxExecutionResult};

/// Different chains may have varying reward policies.
/// This enum specifies which policy to follow, with optional
Expand Down Expand Up @@ -47,11 +44,7 @@ pub trait PevmChain: Debug {
_block_env: &BlockEnv,
txs: &[TxEnv],
) -> MvMemory {
MvMemory::new(
txs.len(),
HashMap::with_hasher(BuildIdentityHasher::default()),
LazyAddresses::default(),
)
MvMemory::new(txs.len(), [], [])
}

/// Get [Handler]
Expand Down
8 changes: 2 additions & 6 deletions src/chain/ethereum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ use revm::{

use super::{PevmChain, RewardPolicy};
use crate::{
mv_memory::{LazyAddresses, MvMemory},
BuildIdentityHasher, MemoryLocation, PevmTxExecutionResult, TxIdx,
mv_memory::MvMemory, BuildIdentityHasher, MemoryLocation, PevmTxExecutionResult, TxIdx,
};

/// Implementation of [PevmChain] for Ethereum
Expand Down Expand Up @@ -143,10 +142,7 @@ impl PevmChain for PevmEthereum {
(0..block_size).collect::<Vec<TxIdx>>(),
);

let mut lazy_addresses = LazyAddresses::default();
lazy_addresses.0.insert(block_env.coinbase);

MvMemory::new(block_size, estimated_locations, lazy_addresses)
MvMemory::new(block_size, estimated_locations, [block_env.coinbase])
}

fn get_handler<'a, EXT, DB: revm::Database>(
Expand Down
31 changes: 17 additions & 14 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@ use std::hash::{BuildHasherDefault, Hasher};
use alloy_primitives::{Address, B256, U256};
use smallvec::SmallVec;

// We take the last 8 bytes of an address as its hash. This
// seems fine as the addresses themselves are hash suffixes,
// and precomiles' suffix should be unique, too.
/// We use the last 8 bytes of an existing hash like address
/// or code hash instead of rehashing it.
// TODO: Make sure this is acceptable for production
#[derive(Default)]
struct AddressHasher(u64);
impl Hasher for AddressHasher {
#[derive(Debug, Default)]
pub struct SuffixHasher(u64);
impl Hasher for SuffixHasher {
fn write(&mut self, bytes: &[u8]) {
let mut suffix = [0u8; 8];
suffix.copy_from_slice(&bytes[bytes.len() - 8..]);
Expand All @@ -24,7 +23,9 @@ impl Hasher for AddressHasher {
self.0
}
}
type BuildAddressHasher = BuildHasherDefault<AddressHasher>;

/// Build a suffix hasher
pub type BuildSuffixHasher = BuildHasherDefault<SuffixHasher>;

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
enum MemoryLocation {
Expand All @@ -40,10 +41,10 @@ enum MemoryLocation {
// on every single lookup & validation.
type MemoryLocationHash = u64;

// This is primarily used for memory location hash, but can also be used for
// transaction indexes, etc.
#[derive(Default)]
struct IdentityHasher(u64);
/// This is primarily used for memory location hash, but can also be used for
/// transaction indexes, etc.
#[derive(Debug, Default)]
pub struct IdentityHasher(u64);
impl Hasher for IdentityHasher {
fn write_u64(&mut self, id: u64) {
self.0 = id;
Expand All @@ -58,7 +59,9 @@ impl Hasher for IdentityHasher {
unreachable!()
}
}
type BuildIdentityHasher = BuildHasherDefault<IdentityHasher>;

/// Build an identity hasher
pub type BuildIdentityHasher = BuildHasherDefault<IdentityHasher>;

// TODO: It would be nice if we could tie the different cases of
// memory locations & values at the type level, to prevent lots of
Expand Down Expand Up @@ -215,8 +218,8 @@ pub use pevm::{execute, execute_revm_parallel, execute_revm_sequential, PevmErro
mod scheduler;
mod storage;
pub use storage::{
AccountBasic, Bytecodes, EvmAccount, EvmCode, InMemoryStorage, RpcStorage, Storage,
StorageWrapper,
AccountBasic, BlockHashes, Bytecodes, ChainState, EvmAccount, EvmCode, InMemoryStorage,
RpcStorage, Storage, StorageWrapper,
};
mod vm;
pub use vm::{ExecutionError, PevmTxExecutionResult};
26 changes: 10 additions & 16 deletions src/mv_memory.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
use std::{collections::BTreeMap, sync::Mutex};
use std::{
collections::{BTreeMap, HashSet},
sync::Mutex,
};

use ahash::AHashSet;
use alloy_primitives::Address;
use dashmap::{mapref::one::Ref, DashMap};

use crate::{
BuildAddressHasher, BuildIdentityHasher, MemoryEntry, MemoryLocationHash, NewLazyAddresses,
BuildIdentityHasher, BuildSuffixHasher, MemoryEntry, MemoryLocationHash, NewLazyAddresses,
ReadOrigin, ReadSet, TxIdx, TxVersion, WriteSet,
};

Expand All @@ -16,13 +18,7 @@ struct LastLocations {
write: Vec<MemoryLocationHash>,
}

#[derive(Debug)]
pub(crate) struct LazyAddresses(pub(crate) AHashSet<Address, BuildAddressHasher>);
impl Default for LazyAddresses {
fn default() -> Self {
LazyAddresses(AHashSet::with_hasher(BuildAddressHasher::default()))
}
}
type LazyAddresses = HashSet<Address, BuildSuffixHasher>;

/// The MvMemory contains shared memory in a form of a multi-version data
/// structure for values written and read by different transactions. It stores
Expand All @@ -46,7 +42,7 @@ impl MvMemory {
pub(crate) fn new(
block_size: usize,
estimated_locations: impl IntoIterator<Item = (MemoryLocationHash, Vec<TxIdx>)>,
lazy_addresses: LazyAddresses,
lazy_addresses: impl IntoIterator<Item = Address>,
) -> Self {
// TODO: Fine-tune the number of shards, like to the next number of two from the
// number of worker threads.
Expand All @@ -67,7 +63,7 @@ impl MvMemory {
Self {
data,
last_locations: (0..block_size).map(|_| Mutex::default()).collect(),
lazy_addresses: Mutex::new(lazy_addresses),
lazy_addresses: Mutex::new(LazyAddresses::from_iter(lazy_addresses)),
}
}

Expand Down Expand Up @@ -105,7 +101,7 @@ impl MvMemory {
if !new_lazy_addresses.is_empty() {
let mut lazy_addresses = self.lazy_addresses.lock().unwrap();
for address in new_lazy_addresses {
lazy_addresses.0.insert(address);
lazy_addresses.insert(address);
}
}

Expand Down Expand Up @@ -196,9 +192,7 @@ impl MvMemory {
}

pub(crate) fn consume_lazy_addresses(&self) -> impl IntoIterator<Item = Address> {
std::mem::take(&mut *self.lazy_addresses.lock().unwrap())
.0
.into_iter()
std::mem::take(&mut *self.lazy_addresses.lock().unwrap()).into_iter()
}

pub(crate) fn consume_location(
Expand Down
17 changes: 14 additions & 3 deletions src/storage.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{fmt::Display, sync::Arc};
use std::{collections::HashMap, fmt::Display, sync::Arc};

use ahash::AHashMap;
use alloy_primitives::{Address, Bytes, B256, U256};
Expand All @@ -10,6 +10,11 @@ use revm::{
};
use serde::{Deserialize, Serialize};

use crate::{BuildIdentityHasher, BuildSuffixHasher};

// TODO: Port EVM types to [primitives.rs] to focus solely
// on the [Storage] interface here.

/// An EVM account.
#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize)]
pub struct EvmAccount {
Expand Down Expand Up @@ -101,8 +106,14 @@ impl From<Bytecode> for EvmCode {
}
}

/// Mapping between code hashes and [EvmCode] values
pub type Bytecodes = AHashMap<B256, EvmCode>;
/// Mapping from address to [EvmAccount]
pub type ChainState = HashMap<Address, EvmAccount, BuildSuffixHasher>;

/// Mapping from code hashes to [EvmCode]s
pub type Bytecodes = HashMap<B256, EvmCode, BuildSuffixHasher>;

/// Mapping from block numbers to block hashes
pub type BlockHashes = HashMap<u64, B256, BuildIdentityHasher>;

/// An interface to provide chain state to Pevm for transaction execution.
/// Staying close to the underlying REVM's Database trait while not leaking
Expand Down
13 changes: 5 additions & 8 deletions src/storage/in_memory.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
use std::{collections::HashMap, fmt::Debug};
use std::fmt::Debug;

use ahash::AHashMap;
use alloy_primitives::{keccak256, Address, B256, U256};

use super::{Bytecodes, EvmCode};
use crate::{AccountBasic, BuildAddressHasher, EvmAccount, Storage};

type Accounts = HashMap<Address, EvmAccount, BuildAddressHasher>;
use super::{BlockHashes, Bytecodes, ChainState, EvmCode};
use crate::{AccountBasic, EvmAccount, Storage};

/// A storage that stores chain data in memory.
#[derive(Debug, Default, Clone)]
pub struct InMemoryStorage<'a> {
accounts: Accounts,
accounts: ChainState,
bytecodes: Option<&'a Bytecodes>,
block_hashes: AHashMap<u64, B256>,
block_hashes: BlockHashes,
}

impl<'a> InMemoryStorage<'a> {
Expand Down
14 changes: 7 additions & 7 deletions src/storage/rpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use tokio::runtime::Runtime;

use crate::{AccountBasic, EvmAccount, Storage};

use super::EvmCode;
use super::{BlockHashes, Bytecodes, ChainState, EvmCode};

type RpcProvider<N> = RootProvider<Http<Client>, N>;

Expand All @@ -34,9 +34,9 @@ pub struct RpcStorage<N> {
// execution on the same block.
// Using a [Mutex] so we don't propagate mutability requirements back
// to our [Storage] trait and meet [Send]/[Sync] requirements for Pevm.
cache_accounts: Mutex<AHashMap<Address, EvmAccount>>,
cache_bytecodes: Mutex<AHashMap<B256, EvmCode>>,
cache_block_hashes: Mutex<AHashMap<u64, B256>>,
cache_accounts: Mutex<ChainState>,
cache_bytecodes: Mutex<Bytecodes>,
cache_block_hashes: Mutex<BlockHashes>,
// TODO: Better async handling.
runtime: Runtime,
}
Expand All @@ -57,17 +57,17 @@ impl<N> RpcStorage<N> {
}

/// Get a snapshot of accounts
pub fn get_cache_accounts(&self) -> AHashMap<Address, EvmAccount> {
pub fn get_cache_accounts(&self) -> ChainState {
self.cache_accounts.lock().unwrap().clone()
}

/// Get a snapshot of bytecodes
pub fn get_cache_bytecodes(&self) -> AHashMap<B256, EvmCode> {
pub fn get_cache_bytecodes(&self) -> Bytecodes {
self.cache_bytecodes.lock().unwrap().clone()
}

/// Get a snapshot of block hashes
pub fn get_cache_block_hashes(&self) -> AHashMap<u64, B256> {
pub fn get_cache_block_hashes(&self) -> BlockHashes {
self.cache_block_hashes.lock().unwrap().clone()
}
}
Expand Down
12 changes: 6 additions & 6 deletions src/vm.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use ahash::{AHashMap, HashMapExt};
use ahash::HashMapExt;
use alloy_rpc_types::Receipt;
use dashmap::DashMap;
use defer_drop::DeferDrop;
Expand All @@ -15,9 +15,9 @@ use std::collections::HashMap;
use crate::{
chain::{PevmChain, RewardPolicy},
mv_memory::MvMemory,
AccountBasic, BuildIdentityHasher, EvmAccount, MemoryEntry, MemoryLocation, MemoryLocationHash,
MemoryValue, NewLazyAddresses, ReadError, ReadOrigin, ReadSet, Storage, TxIdx, TxVersion,
WriteSet,
AccountBasic, BuildIdentityHasher, BuildSuffixHasher, EvmAccount, MemoryEntry, MemoryLocation,
MemoryLocationHash, MemoryValue, NewLazyAddresses, ReadError, ReadOrigin, ReadSet, Storage,
TxIdx, TxVersion, WriteSet,
};

/// The execution error from the underlying EVM executor.
Expand All @@ -27,7 +27,7 @@ pub type ExecutionError = EVMError<ReadError>;
/// Represents the state transitions of the EVM accounts after execution.
/// If the value is [None], it indicates that the account is marked for removal.
/// If the value is [Some(new_state)], it indicates that the account has become [new_state].
type EvmStateTransitions = AHashMap<Address, Option<EvmAccount>>;
type EvmStateTransitions = HashMap<Address, Option<EvmAccount>, BuildSuffixHasher>;

/// Execution result of a transaction
#[derive(Debug, Clone, PartialEq)]
Expand Down Expand Up @@ -507,7 +507,7 @@ pub(crate) struct Vm<'a, S: Storage, C: PevmChain> {
spec_id: SpecId,
beneficiary_location_hash: MemoryLocationHash,
reward_policy: RewardPolicy,
new_bytecodes: DeferDrop<DashMap<B256, Bytecode>>,
new_bytecodes: DeferDrop<DashMap<B256, Bytecode, BuildSuffixHasher>>,
}

impl<'a, S: Storage, C: PevmChain> Vm<'a, S, C> {
Expand Down
11 changes: 2 additions & 9 deletions tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,14 @@ use std::{
io::BufReader,
};

use ahash::AHashMap;
use alloy_primitives::{Address, Bloom, Bytes, B256, U256};
use alloy_rpc_types::{Block, Header};
use pevm::{Bytecodes, EvmAccount, InMemoryStorage};
use pevm::{BlockHashes, Bytecodes, EvmAccount, InMemoryStorage};

pub mod runner;
pub use runner::{assert_execution_result, mock_account, test_execute_alloy, test_execute_revm};
pub mod storage;

pub type ChainState = AHashMap<Address, EvmAccount>;
pub type BlockHashes = AHashMap<u64, B256>;

pub static MOCK_ALLOY_BLOCK_HEADER: Header = Header {
// Minimal requirements for execution
number: Some(1),
Expand Down Expand Up @@ -74,10 +70,7 @@ pub fn for_each_block_from_disk(mut handler: impl FnMut(Block, InMemoryStorage))
let block_hashes: BlockHashes =
File::open(format!("data/blocks/{block_number}/block_hashes.json"))
.map(|file| {
type SerializedFormat = HashMap<u64, B256, ahash::RandomState>;
serde_json::from_reader::<_, SerializedFormat>(BufReader::new(file))
.unwrap()
.into()
serde_json::from_reader::<_, BlockHashes>(BufReader::new(file)).unwrap()
})
.unwrap_or_default();

Expand Down
2 changes: 1 addition & 1 deletion tests/erc20/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ fn erc20_clusters() {
const NUM_TRANSFERS_PER_PERSON: usize = 15;

let mut final_state = AHashMap::from([(Address::ZERO, EvmAccount::default())]); // Beneficiary
let mut final_bytecodes = Bytecodes::new();
let mut final_bytecodes = Bytecodes::default();
let mut final_txs = Vec::<TxEnv>::new();
for _ in 0..NUM_CLUSTERS {
let (state, bytecodes, txs) = generate_cluster(
Expand Down
Loading

0 comments on commit cce4fc9

Please sign in to comment.