-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_rtf.py
executable file
·52 lines (42 loc) · 1.06 KB
/
parse_rtf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# parse RTF file and output result to file
from rtf_parser import rtf_parser
import time
import pprint
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print "No input filename"
sys.exit(1)
fn = sys.argv[1]
print "Input file =", fn
# read file data
f = open(fn, "rU")
data = f.read()
# RTF last byte is \x00, remove it
if data[-1] == '\x00':
data = data[:-1]
print "data length = %d" % len(data)
'''
# Give the lexer some input
lex.input(data)
#print data
# Tokenize
print "token:"
while 1:
tok = lex.token()
if not tok: break # No more input
print tok
'''
#print data
print "parse..."
t1 = time.clock()
res = rtf_parser.parse(data)
t2 = time.clock()
print "parse ok"
print "formating result text..."
res_text = pprint.pformat(res)
#print
#print res_text
fo = open("result.txt", "wt")
print >> fo, res_text
print "time = %f" % (t2 - t1)