-
Notifications
You must be signed in to change notification settings - Fork 2
/
librispeech_demo.py
64 lines (52 loc) · 2.21 KB
/
librispeech_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
import sys
import time
import numpy as np
import pandas as pd
import seaborn as sns
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
import matplotlib.pylab as plt
from pathlib import Path
from presto import pipeline
from presto.analysis import StrategyAnalysis
from presto.strategy import Strategy
from librispeech_pipeline import pipeline_definition
thread_shard_count = int(sys.argv[1])
compression_type = str(sys.argv[2])
sample_count = int(sys.argv[3])
runs = int(sys.argv[4])
storage_type = "remote"
source_path = "/dataset/LibriSpeech/train-clean-100"
target_path = "/tmp"
log_path = "/logs"
# define pipeline with the source path
librispeech_pipeline = pipeline_definition(source_path = source_path)
librispeech_pipeline_steps = list(range(len(librispeech_pipeline)))
del librispeech_pipeline_steps[1] # remove the 1-list-files strategy from profiling
thread_counts = [thread_shard_count]
shard_counts = [thread_shard_count]
thread_shard_counts = zip(thread_counts, shard_counts)
strategies = [ Strategy(
pipeline = librispeech_pipeline
, split_position = None if step == 0 else step
, shard_count = shard_count
, thread_count = thread_count
, shard_directory_prefix = f"{target_path}/librispeech-split"
, compression_type = compression_type
, storage_type = storage_type)
for thread_count, shard_count in thread_shard_counts
for step in librispeech_pipeline_steps]
sample_counts = [sample_count]
runs_total = runs
for sample_count in sample_counts:
for strategy in strategies:
strategy.profile_strategy(sample_count = sample_count
, runs_total = runs_total
, system_cache_enabled = True)
strategy.print_stats()
strategy_dfs = [strat.profile_as_df() for strat in strategies]
dstat_dfs = [strat.profile_as_dstat_df() for strat in strategies]
strat_analysis = StrategyAnalysis(strategy_dataframes = strategy_dfs
, dstat_dataframes = dstat_dfs)
strat_analysis.save_dfs_as_csv(path=log_path, prefix=f"librispeech")