-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
142 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#!/usr/bin/python2.7 | ||
import argparse | ||
import sys | ||
import collections | ||
import itertools | ||
|
||
def make_filter(filter_list, positive, limited=False): | ||
''' | ||
Returns a function which acts as a positive/negative filter. | ||
Warning: If limited is True, the filter is NOT referentially transparent. | ||
''' | ||
if limited: | ||
return make_limited_filter(filter_list, positive) | ||
else: | ||
return make_unlimited_filter(filter_list, positive) | ||
|
||
def make_unlimited_filter(filter_list, positive): | ||
if positive: | ||
valid = set(filter_list) | ||
return lambda x: x in valid | ||
else: | ||
invalid = set(filter_list) | ||
return lambda x: x not in invalid | ||
|
||
def make_limited_filter(filter_list, positive): | ||
return LimitedFilter(filter_list) | ||
|
||
class LimitedFilter(): | ||
def __init__(self, filter_list, positive): | ||
self.d = collections.Counter(filter_list) | ||
self.positive = positive | ||
def remove(self, elt): | ||
self.d -= collections.Counter([elt]) #deletes if absent | ||
def __call__(self, elt): | ||
if elt in self: | ||
self.remove(elt) | ||
return not self.positive | ||
else: | ||
return self.positive | ||
|
||
def filt(filt_file, filtered_file, positive, limited) | ||
filter_ = make_filter(filt_file, positive=positive, limited=limited) | ||
return itertools.ifilter(filter_, filtered_file): | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser('Filter lines matching a specified filter') | ||
parser.add_argument('-n', '--negative', dest='positive', default=False, action='store_false', help='only reject lines in the filter file (default)') | ||
parser.add_argument('-p', '--positive', dest='positive', action='store_true', help='only accept lines in the filter file') | ||
parser.add_argument('-u', '--unlimited', dest='limited', default=False, action='store_false', help='accept/reject lines every time they are encountered') | ||
parser.add_argument('-l', '--limited', dest='limited', action='store_true', help='accept/reject lines once for each time they are in the filter only (and then do the reject/accept afterwards)') | ||
parser.add_argumennt(metavar="FILTER", dest='filter', type=file, help='an (unordered) list of lines to use as a filter') | ||
parser.add_argument(metavar="INPUT", dest='input', type=file, nargs="?", default=sys.stdin, help='these lines are output if and only if they match the specified filter') | ||
options = parser.parse_args() | ||
for valid_line in filt(options.filter, options.input, positive=options.positive, limited=options.limited): | ||
print valid_line, | ||
|
||
if __name__=='__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/usr/bin/python2.7 | ||
# Multiset operations | ||
# Set operations | ||
import argparse | ||
import collections | ||
import sys | ||
from functools import partial, reduce | ||
import operator | ||
|
||
def binary_file_op(string, file1, file2): | ||
f = tempfile.NamedTemporaryFile() | ||
subprocess.call(string, file1.name, file2.name, ">", f.name) | ||
return f | ||
|
||
def set_op(op, files): | ||
sets = map(set, files) | ||
poss = reduce(operator.or_, sets) | ||
for line in poss: | ||
if op(*[line in fn for fn in sets]): | ||
pass | ||
print line, | ||
|
||
def multiset_op(op, files): | ||
bags = map(collections.Counter, files) | ||
poss = reduce(operator.or_, map(set, bags)) | ||
for line in poss: | ||
for _ in range(op(*[fn.get(line, 0) for fn in bags])): | ||
print line, | ||
|
||
def binary_set_op(op, f1, f2): | ||
return set_op(op, [f1, f2]) | ||
|
||
def binary_multiset_op(op, f1, f2): | ||
return multiset_op(op, [f1, f2]) | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description='Perform simple set operations with unordered files.') | ||
subparsers = parser.add_subparsers() | ||
# Set options | ||
set_parser = subparsers.add_parser("set", help='perform set operation ("set -h" for more help)') | ||
|
||
setops = set_parser.add_mutually_exclusive_group(required=True) | ||
for opname, help, opfunc in [ | ||
('union', 'print all lines which appear in either file', operator.or_), | ||
('intersection', 'print all lines which appear in either file', operator.and_), | ||
('difference', 'print all lines which appear in the first file but not the second', lambda a,b: a and not b), | ||
('symmetric-difference', 'print all lines which appear in exactly one of the two files', operator.xor), | ||
]: | ||
setops.add_argument('--'+opname, dest='func', help=help, | ||
action='store_const', const=partial(binary_set_op, opfunc),) | ||
|
||
set_parser.add_argument('file1', type=file) | ||
set_parser.add_argument('file2', type=file) | ||
|
||
# Multiset options | ||
multiset_parser = subparsers.add_parser("ms", help='perform multiset operation ("ms -h" for more help)') | ||
|
||
msops = multiset_parser.add_mutually_exclusive_group(required=True) | ||
for opname, help, opfunc in [ | ||
('union', 'print whichever count of lines appears more', max), | ||
('intersection', 'print which count of lines appears less', min), | ||
('difference', 'print <n> copies of a line which appear <m+n> times in the first file but only <m> times in the second; lines which appear more times in the second file are not printed', lambda a,b: max(a-b,0)), | ||
('sym_difference', 'print <n> copies of a lines which appears <m+n> times in one file and <m> times in the other', lambda a,b: abs(m-n)), | ||
('add', 'combine the two files', operator.add), | ||
]: | ||
msops.add_argument('--'+opname, dest='func', help=help, | ||
action='store_const', const=partial(binary_multiset_op, opfunc),) | ||
|
||
multiset_parser.add_argument('file1', type=file) | ||
multiset_parser.add_argument('file2', type=file) | ||
|
||
options = parser.parse_args() | ||
options.func(options.file1, options.file2) | ||
|
||
if __name__=="__main__": | ||
main() |