Skip to content

Commit

Permalink
Ensure that we use bytes insteads of bits
Browse files Browse the repository at this point in the history
  • Loading branch information
Devesh Sarda committed Feb 15, 2024
1 parent 4919cb2 commit 3c395b8
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 18 deletions.
4 changes: 2 additions & 2 deletions simulator/configs/arvix_linear.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"dataset_name" : "ogbn_arxiv",
"features_stats" : {
"featurizer_type" : "default",
"page_size" : "16 KB",
"feature_dimension" : 64,
"page_size" : "16.384 KB",
"feature_dimension" : 128,
"feature_size" : "float32"
}
}
4 changes: 2 additions & 2 deletions simulator/configs/arvix_neighbors.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
"dataset_name" : "ogbn_arxiv",
"features_stats" : {
"featurizer_type" : "neighbors_nearby",
"page_size" : "16 KB",
"feature_dimension" : 64,
"page_size" : "16.384 KB",
"feature_dimension" : 128,
"feature_size" : "float32"
}
}
6 changes: 3 additions & 3 deletions simulator/src/dataset_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def load_dataset(self):
self.adjacency_map = {}
for source, target in edges_arr:
if source not in self.adjacency_map:
self.adjacency_map[source] = []
self.adjacency_map[source].append(target)
self.adjacency_map[source] = set()
self.adjacency_map[source].add(target)

def get_num_nodes(self):
return len(self.nodes)
Expand All @@ -46,7 +46,7 @@ def get_neigbhors_for_node(self, node_id):
if node_id not in self.adjacency_map:
return []

return self.adjacency_map[node_id]
return list(self.adjacency_map[node_id])

def get_num_edges(self):
return self.num_edges
22 changes: 17 additions & 5 deletions simulator/src/features_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
import os
import math
import random
import numpy as np


class FeaturesLoader:
def __init__(self, data_loader, features_stat):
self.data_loader = data_loader
self.features_stat = features_stat
self.page_size = humanfriendly.parse_size(features_stat["page_size"])
self.feature_size = int("".join(c for c in features_stat["feature_size"] if c.isdigit()))
self.feature_size = np.dtype(features_stat["feature_size"]).itemsize
self.node_feature_size = self.feature_size * features_stat["feature_dimension"]
self.nodes_per_page = max(int(self.page_size / self.node_feature_size), 1)
self.initialize()
Expand All @@ -22,7 +23,12 @@ def initialize(self):
random.shuffle(self.node_location_map)

def get_node_page(self, src_node, neighbor_node):
return int(self.node_location_map[neighbor_node] / self.nodes_per_page)
start_node = int(self.node_location_map[neighbor_node] / self.nodes_per_page)
curr_page_nodes = set()
for node_id in range(start_node, start_node + self.nodes_per_page):
curr_page_nodes.add(node_id)

return curr_page_nodes

def get_total_file_size(self):
total_bytes = self.page_size * self.total_pages
Expand All @@ -44,11 +50,17 @@ def initialize(self):
neighbors_to_keep = min(len(all_neighbors), num_neighbors)
self.neighbors_in_page[curr_node] = all_neighbors[:neighbors_to_keep]

def get_page_for_node(self, node):
page_nodes = set()
page_nodes.add(node)
for neighbor_node in self.neighbors_in_page[node]:
page_nodes.add(neighbor_node)
return page_nodes

def get_node_page(self, src_node, neighbor_node):
if neighbor_node in self.neighbors_in_page[src_node]:
return src_node

return neighbor_node
return self.get_page_for_node(src_node)
return self.get_page_for_node(neighbor_node)


features_class_map = {"default": FeaturesLoader, "neighbors_nearby": NeighborFeaturesLoader}
Expand Down
23 changes: 17 additions & 6 deletions simulator/src/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,22 @@ def __init__(self, data_loader, features_loader):

def perform_sampling_for_node(self, node_id):
# Read for this node
pages_read = set()
pages_read.add(self.features_loader.get_node_page(node_id, node_id))
neighbor_nodes = self.data_loader.get_neigbhors_for_node(node_id)
if len(neighbor_nodes) == 0:
return 0

# Read the features for this node's neighbors
for neighbor in self.data_loader.get_neigbhors_for_node(node_id):
pages_read.add(self.features_loader.get_node_page(node_id, neighbor))
# Load the nodes features
nodes_features_loaded = self.features_loader.get_node_page(node_id, node_id)
pages_loaded = 1
log_value = len(neighbor_nodes) > len(nodes_features_loaded)

return len(pages_read)
for neighbor in neighbor_nodes:
if neighbor in nodes_features_loaded:
continue

# We haven't loaded the page for this node
neighbors_page = self.features_loader.get_node_page(node_id, neighbor)
nodes_features_loaded.update(neighbors_page)
pages_loaded += 1

return pages_loaded

0 comments on commit 3c395b8

Please sign in to comment.