-
Notifications
You must be signed in to change notification settings - Fork 4
/
analyzeNtuples.py
137 lines (113 loc) · 4.73 KB
/
analyzeNtuples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import sys
import typer
import yaml
import importlib
import pathlib
from rich import print as pprint
from python.parameters import Parameters, get_collection_parameters
from python.analyzer import analyze
from python.submission import to_HTCondor
from python.timecounter import print_stats
import ROOT
description = """
Main script for L1 TP analysis.
The script reads the configuration,
opens the input and output files for the given sample,
runs the event loop and saves histograms to disk.
All the analysis logic is anyhow elsewhere:
Data:
which data are potentially read is handled in the `collections` module.
How to select the data is handled in the `selections` module.
Plotters:
what to do with the data is handled in the `plotters` module
Histograms:
which histograms are produced is handled in the
`histos` module (and the plotters).
"""
@print_stats
def analyzeNtuples( # noqa: PLR0913
configfile: str = typer.Option(..., '-f', '--file', help='specify the yaml configuration file'),
datasetfile: str = typer.Option(
..., '-i', '--input-dataset', help='specify the yaml file defining the input dataset'
),
collection: str = typer.Option(..., '-p', '--plotters', help='specify the plotters to be run'),
sample: str = typer.Option(
...,
'-s',
'--sample',
help='specify the sample to be processed',
),
debug: int = typer.Option(0, '-d', '--debug', help='debug level'),
nevents: int = typer.Option(10, '-n', '--nevents', help='# of events to process per sample'),
batch: bool = typer.Option(None, '-b', '--batch', help='submit the jobs via CONDOR'),
run: str = typer.Option(None, '-r', '--run', help='the batch_id to run (need to be used with the option -b)'),
outdir: str = typer.Option(None, '-o', '--outdir', help='override the output directory for the files'),
local: bool = typer.Option(False, '-l', '--local', help='run the batch on local resources'),
workers: int = typer.Option(2, '-j', '--jobworkers', help='# of local workers'),
workdir: str = typer.Option(None, '-w', '--workdir', help='local work directory'),
submit: bool = typer.Option(False, '-S', '--submit', help='submit the jobs via CONDOR'),
):
if submit and local and not workdir:
raise ValueError('The --workdir option is required when submitting jobs locally')
def parse_yaml(filename):
with open(filename) as stream:
return yaml.load(stream, Loader=yaml.FullLoader)
cfgfile = {}
# we load the python module with the same name as the yaml file
pymoudule_path = pathlib.Path(configfile.split('.yaml')[0])
formatted_path = '.'.join(pymoudule_path.with_suffix('').parts)
sys.modules[formatted_path] = importlib.import_module(formatted_path)
cfgfile.update(parse_yaml(configfile))
cfgfile.update(parse_yaml(datasetfile))
opt = Parameters(
{
'COLLECTION': collection,
'SAMPLE': sample,
'DEBUG': debug,
'NEVENTS': nevents,
'BATCH': batch,
'RUN': run,
'OUTDIR': outdir,
'LOCAL': local,
'WORKERS': workers,
'WORKDIR': workdir,
'SUBMIT': submit,
}
)
collection_params = get_collection_parameters(opt, cfgfile)
samples_to_process = []
if not opt.COLLECTION:
print(f'\nAvailable plotter collections: {collection_params.keys()}')
sys.exit(0)
if opt.COLLECTION not in collection_params:
print(f'ERROR: plotter collection {opt.COLLECTION} not in the cfg file')
sys.exit(10)
if not opt.SAMPLE:
print(f'Plotter collection: {opt.COLLECTION}, available samples: {collection_params[opt.COLLECTION]}')
sys.exit(0)
if opt.SAMPLE == 'all':
samples_to_process.extend(collection_params[opt.COLLECTION])
else:
sel_sample = [sample for sample in collection_params[opt.COLLECTION] if sample.name == opt.SAMPLE]
samples_to_process.append(sel_sample[0])
pprint(f'About to process samples: {samples_to_process}')
plot_version = f"{cfgfile['common']['plot_version']}.{cfgfile['dataset']['version']}"
to_HTCondor(
analyze=analyze,
opt=opt,
submit_mode=submit,
plot_version=plot_version,
samples_to_process=samples_to_process,
)
batch_idx = -1
if opt.BATCH and opt.RUN:
batch_idx = int(opt.RUN)
ret_nevents = 0
for idx, sample in enumerate(samples_to_process):
pprint(
f'\n\n========================== #{idx+1}/{len(samples_to_process)}: {sample.name} ==========================\n'
)
ret_nevents += analyze(sample, batch_idx=batch_idx)
return ret_nevents
if __name__ == '__main__':
typer.run(analyzeNtuples)