From 7bc85875bad418c614366e490d0e0362926ceb46 Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Mon, 18 Jan 2021 16:27:15 -0800 Subject: [PATCH] Small Python fixes. --- python/perfecthash/analysis.py | 23 ++++++++++++++--------- python/perfecthash/commands.py | 5 +++-- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/python/perfecthash/analysis.py b/python/perfecthash/analysis.py index 005f1cbd..8f6051c1 100644 --- a/python/perfecthash/analysis.py +++ b/python/perfecthash/analysis.py @@ -1932,35 +1932,35 @@ def get_yyyy_mm_dd_subdirs(dirname): def get_csv_files(directory): import glob - return [ + return set( f for f in glob.iglob( f'{directory}/**/PerfectHashBulkCreate*.csv', recursive=True ) - ] + ) def get_all_bulk_create_parquet_files(directory): import glob - return [ + return set( f for f in glob.iglob( f'{directory}/**/PerfectHashBulkCreate*.parquet', recursive=True ) if 'failed' not in f - ] + [ + ).union(set( f for f in glob.iglob( f'{directory}/PerfectHashBulkCreate*.parquet', recursive=False ) if 'failed' not in f - ] + )) def get_best_bulk_create_parquet_files(directory): import glob - return [ + return set( f for f in glob.iglob( f'{directory}/**/PerfectHashBulkCreateBest*.parquet', recursive=True ) if 'failed' not in f - ] + ) def convert_csv_to_parquet(path, base_research_dir, out=None): if not out: @@ -2343,8 +2343,9 @@ def process_xperf_perfecthash_csv(path, out=None): 'ProcessID', 'ThreadID', 'CPU', + 'ActivityId', 'BytesRequested', - 'Success', + 'Result', ]] df.to_csv(path) @@ -2355,6 +2356,7 @@ def process_xperf_perfecthash_csv(path, out=None): #=============================================================================== def get_cache_line_coverage(df): + import numpy as np count = df.NewBestGraphCount.values[0] keys = [ f'BestGraph{i}_CountOfCacheLinesWithNumberOfAssigned_{n}' @@ -2368,8 +2370,11 @@ def get_cache_line_coverage(df): return (keys, values, attempts, columns) def ridgeline_plot(df): + import joypy + import pandas as pd import matplotlib.pyplot as plt - plt.ioff() + from matplotlib import cm + #plt.ioff() keys_name = df.KeysName.values[0] hash_func = df.HashFunction.values[0] best_coverage_type = df.BestCoverageType.values[0] diff --git a/python/perfecthash/commands.py b/python/perfecthash/commands.py index 9aebb48a..26edc6d2 100644 --- a/python/perfecthash/commands.py +++ b/python/perfecthash/commands.py @@ -498,8 +498,9 @@ class PathArg(PathInvariant): def run(self): + from os.path import basename from .analysis import convert_csv_to_parquet - convert_csv_to_parquet(self._path, self._out) + convert_csv_to_parquet(self._path, basename(self._path)) class ConvertAllCsvToParquet(InvariantAwareCommand): """ @@ -526,7 +527,7 @@ def run(self): if path: from os.path import basename - path = basename(path) + base = basename(path) else: path = base