diff --git a/.gitignore b/.gitignore index a21e76b8..e62f4cbb 100644 --- a/.gitignore +++ b/.gitignore @@ -174,4 +174,5 @@ Thumbs.db src/cpp/third_party test_datasets -simulator/datasets \ No newline at end of file +simulator/datasets +simulator/images \ No newline at end of file diff --git a/simulator/configs/arvix.json b/simulator/configs/arvix.json index 5fc912d3..ae7963ff 100644 --- a/simulator/configs/arvix.json +++ b/simulator/configs/arvix.json @@ -1,6 +1,7 @@ { "dataset_name" : "ogbn_arxiv", "features_stats" : { + "feature_layout" : "random", "page_size" : "16 KB", "feature_dimension" : 128, "feature_size" : "float32" diff --git a/simulator/images/arvix.png b/simulator/images/arvix.png deleted file mode 100644 index 08be886b..00000000 Binary files a/simulator/images/arvix.png and /dev/null differ diff --git a/simulator/main.py b/simulator/main.py index 7efc8c17..bd59c424 100644 --- a/simulator/main.py +++ b/simulator/main.py @@ -16,13 +16,12 @@ def read_config_file(config_file): def read_arguments(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("config_file", type=str, help="The config file containing the details for the simulation") + parser.add_argument("--config_file", type=str, help="The config file containing the details for the simulation") + parser.add_argument("--save_path", required=True, type=str, help="The path to save the resulting image to") + parser.add_argument("--graph_title", required=True, type=str, help="The title of the saved graph") return parser.parse_args() -IMAGES_SAVE_DIR = "images" - - def main(): arguments = read_arguments() config = read_config_file(arguments.config_file) @@ -44,8 +43,8 @@ def main(): print("Got result for", len(pages_loaded), "nodes out of", len(nodes_to_sample), "nodes") # Save the histogram - save_path = os.path.join(IMAGES_SAVE_DIR, os.path.basename(arguments.config_file).replace("json", "png")) - visualize_results(pages_loaded, save_path, config["dataset_name"]) + os.makedirs(os.path.dirname(arguments.save_path), exist_ok=True) + visualize_results(pages_loaded, arguments.save_path, arguments.graph_title, config["dataset_name"]) if __name__ == "__main__": diff --git a/simulator/src/features_loader.py b/simulator/src/features_loader.py index eb17a3ef..5cfb3872 100644 --- a/simulator/src/features_loader.py +++ b/simulator/src/features_loader.py @@ -1,6 +1,7 @@ import humanfriendly import os import math +import random class FeaturesLoader: @@ -11,9 +12,16 @@ def __init__(self, data_loader, features_stat): self.node_feature_size = self.feature_size * features_stat["feature_dimension"] self.nodes_per_page = max(int(self.page_size / self.node_feature_size), 1) - self.total_pages = int(math.ceil(data_loader.get_num_nodes() / (1.0 * self.nodes_per_page))) + total_nodes = data_loader.get_num_nodes() + self.total_pages = int(math.ceil(total_nodes / (1.0 * self.nodes_per_page))) + + self.node_location_map = [i for i in range(total_nodes)] + if "feature_layout" in features_stat and features_stat["feature_layout"] == "random": + random.shuffle(self.node_location_map) + print(self.node_location_map[:10]) def get_node_page(self, node_id): + node_location = self.node_location_map[node_id] return int(node_id / self.nodes_per_page) def get_total_file_size(self): diff --git a/simulator/src/visualizer.py b/simulator/src/visualizer.py index f75d2832..c9c2d660 100644 --- a/simulator/src/visualizer.py +++ b/simulator/src/visualizer.py @@ -2,14 +2,14 @@ import os -def visualize_results(pages_loaded, save_path, dataset_name, num_bins=50): +def visualize_results(pages_loaded, save_path, graph_title, dataset_name, num_bins=50): # Create the histogram plt.figure() plt.ecdf(pages_loaded, label="CDF") plt.hist(pages_loaded, bins=num_bins, histtype="step", density=True, cumulative=True, label="Cumulative histogram") plt.xlabel("Number of pages loaded for node inference") plt.ylabel("Percentage of nodes") - plt.title("Number of pages loaded for node inference on " + dataset_name) + plt.title(graph_title) plt.xlim(0, 50) plt.legend()