-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy patheval.py
54 lines (39 loc) · 1.82 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from tabulate import tabulate
import argparse
parser = argparse.ArgumentParser(description='Separate reads in to bins.')
parser.add_argument(
'--truth', '-t', help="Path to text file with grounds truth. Items with label Unknown will be ignored.", type=str, required=True)
parser.add_argument(
'--bins', '-b', help="Path of bins.txt file from LRBinner.", type=str, required=True)
parser.add_argument(
'--print', '-p', help="Print assignments in tabular form.", action='store_true')
args = parser.parse_args()
truth = args.truth
bins = args.bins
tab = args.print
def clusters_table(clusters, truth, tab):
clx = set(clusters)
trx = set(truth)
c_map = {k: v for v, k in enumerate(clx)}
t_map = {k: v for v, k in enumerate(trx)}
matrix = [["_"] + [f"Bin-{c_map[x]}_({x})" for x in list(clx)]]
matrix += [[x] + [0 for i in range(len(clx))] for x in trx]
mat = [[0 for i in range(len(clx))] for x in trx]
for c, t in zip(clusters, truth):
matrix[t_map[t] + 1][c_map[c] + 1] += 1
mat[t_map[t]][c_map[c]] += 1
matT = [[mat[j][i] for j in range(len(mat))] for i in range(len(mat[0]))]
recall = sum([max(row) for row in mat])/sum([sum(row) for row in mat])
precision = sum([max(row) for row in matT])/sum([sum(row) for row in matT])
if tab:
print(tabulate(matrix, tablefmt="plain"))
print()
print(f"Precision\t{recall*100:10.2f}")
print(f"Recall \t{precision*100:10.2f}")
print(f"F1-Score \t{(2 * recall*precision/(recall+precision))*100:10.2f}")
print(f"Bins \t{len(clx):10}")
truth = open(truth).read().strip().split("\n")
bins = open(bins).read().strip().split("\n")
bins_f = [b for b, t in zip(bins, truth) if t.lower() != "unknown"]
truth_f = [t for b, t in zip(bins, truth) if t.lower() != "unknown"]
clusters_table(bins_f, truth_f, tab)