-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
47 lines (36 loc) · 965 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
RegExTractor
------------
Python regex generator.
Takes 2 or more strings (or even a single one) and generates a RegEx that
matches similar strings.
The generated RegEx always matches the original strings, but it also
generalizes, usually matching more.
"""
from subseq_tree import gen_tree, tree_to_regex, tree_to_HTML
def extract(strs):
""" (The main function)
Takes a list of strings and generates a RegEx that matches similar strings.
"""
tree = gen_tree(strs)
import pprint
pp = pprint.PrettyPrinter()
pp.pprint(tree)
return tree_to_regex(tree)
def extract_HTML(strs):
tree = gen_tree(strs)
return tree_to_HTML(tree)
if __name__ == '__main__':
s1 = 'abc$1250'
s2 = 'xby#340'
s3 = 'sbs@00000'
print extract([s1, s2, s3])
print
s1 = 'skull'
s2 = 'school'
print extract([s1, s2])
print
s1 = '<div></div>'
s2 = '<span></span>'
print extract([s1, s2])
print