-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
124 lines (105 loc) · 3.11 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import argparse
import pathlib
from src.pipeline import PipelineBuilder, JoinerTypes, SamplerTypes, SolverTypes
from src.search import search
def config_argparser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()
parser.add_argument(
"--search",
type=str,
required=False,
default=None,
help="What to search for in Google News",
)
parser.add_argument(
"--data_path",
type=str,
required=False,
default=None,
help="The path to a csv file with two columns ('title' and 'media') to proccess",
)
return parser
def search_and_summarize(
search_str: str,
sampler_type: SamplerTypes,
joiner_type: JoinerTypes,
solver_type: SolverTypes,
sampler_kwargs: dict = None,
joiner_kwargs: dict = None,
solver_kwargs: dict = None,
) -> str:
"""
Searches a term and them summarize it
"""
print(f"[LOG] Searching for term: {search_str}")
results = search(search_str)
print(f"[LOG] Summarizing")
return summarize(
results,
sampler_type,
joiner_type,
solver_type,
sampler_kwargs,
joiner_kwargs,
solver_kwargs,
)
def load_and_summarize(
data_path: str,
sampler_type: SamplerTypes,
joiner_type: JoinerTypes,
solver_type: SolverTypes,
sampler_kwargs: dict = None,
joiner_kwargs: dict = None,
solver_kwargs: dict = None,
) -> str:
"""
Load the headlines from a file and them sumarize them
"""
headlines = list()
data_path = pathlib.Path(data_path)
print(f" [LOG] Reading {data_path.resolve()}")
with open(data_path, "r") as file:
for idx, line in enumerate(file):
# Jump header
if idx == 0:
continue
title, media = line.split(";")
headline = {
"title": title.strip().strip("\n"),
"media": media.strip().strip("\n"),
}
headlines.append(headline)
print(f"[LOG] Summarizing")
return summarize(
headlines,
sampler_type,
joiner_type,
solver_type,
sampler_kwargs,
joiner_kwargs,
solver_kwargs,
)
def summarize(
headlines: list[dict],
sampler_type: SamplerTypes,
joiner_type: JoinerTypes,
solver_type: SolverTypes,
sampler_kwargs: dict = None,
joiner_kwargs: dict = None,
solver_kwargs: dict = None,
) -> str:
pipe = PipelineBuilder.build(sampler_type, joiner_type, solver_type)
return pipe.run(headlines, sampler_kwargs, joiner_kwargs, solver_kwargs)
if __name__ == "__main__":
args = config_argparser().parse_args()
sampler = SamplerTypes.RANDOM
joiner = JoinerTypes.WITH_SOURCE
solver = SolverTypes.AS_IS
if args.search is not None:
print(search_and_summarize(args.search, sampler, joiner, solver))
elif args.data_path is not None:
print(load_and_summarize(args.data_path, sampler, joiner, solver))
else:
raise ValueError(
"One should give a search term or a data_path where to load headlines from!"
)