Skip to content

Commit

Permalink
Add filter, setop
Browse files Browse the repository at this point in the history
  • Loading branch information
za3k committed Jun 1, 2015
1 parent 45c0529 commit c93ae69
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 0 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ dzen-clock
---
Make a small clock at the bottom of the screen

filter
---
Usage: `cat <stream | filter --negative FILENAME` blacklists anything in FILENAME, there are options for whitelisting and dealing with repeats.

google
---
Searches for something on google, opening the results page in the default browser.
Expand Down Expand Up @@ -106,6 +110,10 @@ retry
---
Retry a command 5 times or until it succeeds

setop
---
Command-line program for performing basic set operations on lines in files. `comm` can be used for some of this on sorted files but it's a little stronger.

timer
---
Kitchen timer for the command line
Expand Down
58 changes: 58 additions & 0 deletions filter
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/python2.7
import argparse
import sys
import collections
import itertools

def make_filter(filter_list, positive, limited=False):
'''
Returns a function which acts as a positive/negative filter.
Warning: If limited is True, the filter is NOT referentially transparent.
'''
if limited:
return make_limited_filter(filter_list, positive)
else:
return make_unlimited_filter(filter_list, positive)

def make_unlimited_filter(filter_list, positive):
if positive:
valid = set(filter_list)
return lambda x: x in valid
else:
invalid = set(filter_list)
return lambda x: x not in invalid

def make_limited_filter(filter_list, positive):
return LimitedFilter(filter_list)

class LimitedFilter():
def __init__(self, filter_list, positive):
self.d = collections.Counter(filter_list)
self.positive = positive
def remove(self, elt):
self.d -= collections.Counter([elt]) #deletes if absent
def __call__(self, elt):
if elt in self:
self.remove(elt)
return not self.positive
else:
return self.positive

def filt(filt_file, filtered_file, positive, limited)
filter_ = make_filter(filt_file, positive=positive, limited=limited)
return itertools.ifilter(filter_, filtered_file):

def main():
parser = argparse.ArgumentParser('Filter lines matching a specified filter')
parser.add_argument('-n', '--negative', dest='positive', default=False, action='store_false', help='only reject lines in the filter file (default)')
parser.add_argument('-p', '--positive', dest='positive', action='store_true', help='only accept lines in the filter file')
parser.add_argument('-u', '--unlimited', dest='limited', default=False, action='store_false', help='accept/reject lines every time they are encountered')
parser.add_argument('-l', '--limited', dest='limited', action='store_true', help='accept/reject lines once for each time they are in the filter only (and then do the reject/accept afterwards)')
parser.add_argumennt(metavar="FILTER", dest='filter', type=file, help='an (unordered) list of lines to use as a filter')
parser.add_argument(metavar="INPUT", dest='input', type=file, nargs="?", default=sys.stdin, help='these lines are output if and only if they match the specified filter')
options = parser.parse_args()
for valid_line in filt(options.filter, options.input, positive=options.positive, limited=options.limited):
print valid_line,

if __name__=='__main__':
main()
76 changes: 76 additions & 0 deletions setop
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/python2.7
# Multiset operations
# Set operations
import argparse
import collections
import sys
from functools import partial, reduce
import operator

def binary_file_op(string, file1, file2):
f = tempfile.NamedTemporaryFile()
subprocess.call(string, file1.name, file2.name, ">", f.name)
return f

def set_op(op, files):
sets = map(set, files)
poss = reduce(operator.or_, sets)
for line in poss:
if op(*[line in fn for fn in sets]):
pass
print line,

def multiset_op(op, files):
bags = map(collections.Counter, files)
poss = reduce(operator.or_, map(set, bags))
for line in poss:
for _ in range(op(*[fn.get(line, 0) for fn in bags])):
print line,

def binary_set_op(op, f1, f2):
return set_op(op, [f1, f2])

def binary_multiset_op(op, f1, f2):
return multiset_op(op, [f1, f2])

def main():
parser = argparse.ArgumentParser(description='Perform simple set operations with unordered files.')
subparsers = parser.add_subparsers()
# Set options
set_parser = subparsers.add_parser("set", help='perform set operation ("set -h" for more help)')

setops = set_parser.add_mutually_exclusive_group(required=True)
for opname, help, opfunc in [
('union', 'print all lines which appear in either file', operator.or_),
('intersection', 'print all lines which appear in either file', operator.and_),
('difference', 'print all lines which appear in the first file but not the second', lambda a,b: a and not b),
('symmetric-difference', 'print all lines which appear in exactly one of the two files', operator.xor),
]:
setops.add_argument('--'+opname, dest='func', help=help,
action='store_const', const=partial(binary_set_op, opfunc),)

set_parser.add_argument('file1', type=file)
set_parser.add_argument('file2', type=file)

# Multiset options
multiset_parser = subparsers.add_parser("ms", help='perform multiset operation ("ms -h" for more help)')

msops = multiset_parser.add_mutually_exclusive_group(required=True)
for opname, help, opfunc in [
('union', 'print whichever count of lines appears more', max),
('intersection', 'print which count of lines appears less', min),
('difference', 'print <n> copies of a line which appear <m+n> times in the first file but only <m> times in the second; lines which appear more times in the second file are not printed', lambda a,b: max(a-b,0)),
('sym_difference', 'print <n> copies of a lines which appears <m+n> times in one file and <m> times in the other', lambda a,b: abs(m-n)),
('add', 'combine the two files', operator.add),
]:
msops.add_argument('--'+opname, dest='func', help=help,
action='store_const', const=partial(binary_multiset_op, opfunc),)

multiset_parser.add_argument('file1', type=file)
multiset_parser.add_argument('file2', type=file)

options = parser.parse_args()
options.func(options.file1, options.file2)

if __name__=="__main__":
main()

0 comments on commit c93ae69

Please sign in to comment.