-
Notifications
You must be signed in to change notification settings - Fork 0
/
collect_tags.py
executable file
·146 lines (113 loc) · 3.1 KB
/
collect_tags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/python
"""
Collect named tags.
"""
from sys import argv,stdin,stderr,exit
from math import ceil
def usage(s=None):
message = """
usage: cat items_file | collect_tags [options]
--separator=<separator> separator for tags
(default is comma)
--head=<number> limit the number of input lines
The input consists of lines of (group,tag) pairs. Additional columns are
ignored.
E88BQJZ01 A 8894
E88BQJZ01 B 140509
E88BQJZ01 C 4638
E88BQJZ02 B 135134
E88BQJZ02 C 4274
FH0VK6D01 A 40470
FH0VK6D01 C 22830
FH0VK6D02 A 47624
Output looks like this:
E88BQJZ01 A,B,C
E88BQJZ02 B,C
FH0VK6D01 A,C
FH0VK6D02 A"""
if (s == None): exit (message)
else: exit ("%s\n%s" % (s,message))
def main():
global debug
# parse the command line
separator = ","
headLimit = None
debug = []
for arg in argv[1:]:
if ("=" in arg):
argVal = arg.split("=",1)[1]
if (arg.startswith("--separator=")) or (arg.startswith("--sep=")):
separator = argVal
if (separator == "tab"): separator = "\t"
elif (separator == "space"): separator = " "
elif (separator == "none"): separator = ""
elif (arg.startswith("--head=")):
headLimit = int_with_unit(argVal)
elif (arg == "--debug"):
debug += ["debug"]
elif (arg.startswith("--debug=")):
debug += argVal.split(",")
elif (arg.startswith("--")):
usage("unrecognized option: %s" % arg)
else:
usage("unrecognized option: %s" % arg)
# read the items
keys = []
keyToTags = {}
lineNum = 0
for line in stdin:
lineNum += 1
if (headLimit != None) and (lineNum > headLimit):
print >>stderr, "limit of %s lines reached" % (commatize(headLimit))
break
fields = line.split()
assert (len(fields) >= 2), \
"not enough fields in line %d (expected at least 2)\n%s" \
% (lineNumber,line)
key = fields[0]
tag = fields[1]
if (key not in keyToTags):
keys += [key]
keyToTags[key] = [tag]
elif (tag not in keyToTags[key]):
keyToTags[key] += [tag]
# report the items
for key in keys:
print "%s\t%s" % (key,separator.join([tag for tag in keyToTags[key]]))
# int_with_unit--
# Parse a string as an integer, allowing unit suffixes
def int_with_unit(s):
if (s.endswith("K")):
multiplier = 1000
s = s[:-1]
elif (s.endswith("M")):
multiplier = 1000 * 1000
s = s[:-1]
elif (s.endswith("G")):
multiplier = 1000 * 1000 * 1000
s = s[:-1]
else:
multiplier = 1
try: return int(s) * multiplier
except ValueError: return int(ceil(float(s) * multiplier))
# commatize--
# Convert a numeric string into one with commas.
def commatize(s):
if (type(s) != str): s = str(s)
(prefix,val,suffix) = ("",s,"")
if (val.startswith("-")): (prefix,val) = ("-",val[1:])
if ("." in val):
(val,suffix) = val.split(".",1)
suffix = "." + suffix
try: int(val)
except: return s
digits = len(val)
if (digits > 3):
leader = digits % 3
chunks = []
if (leader != 0):
chunks += [val[:leader]]
chunks += [val[ix:ix+3] for ix in xrange(leader,digits,3)]
val = ",".join(chunks)
return prefix + val + suffix
if __name__ == "__main__": main()