This repository has been archived by the owner on Oct 8, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.py
114 lines (83 loc) · 2.56 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import sys
import markdown
import HTMLParser
import config
def read_markdown_from_file(path):
return open(path, 'r').read()
def md2html(md):
return markdown.markdown(md).encode("utf-8")
def html2enml(html):
enmlParser = EnmlParser(config.enml_legal_tag, config.enml_ilegal_attr)
enmlParser.feed(html)
# print enmlParser.get_clean_text()
return enmlParser.get_clean_text()
class EnmlParser(HTMLParser.HTMLParser):
'''
ENML tags & attributes filter.
'''
def __init__(self, legal_tag, ilegal_attr):
HTMLParser.HTMLParser.__init__(self)
self.legal_tag = legal_tag
self.illegal_attr = ilegal_attr
self.clean_text = []
self.pre_tag = None
def handle_starttag(self, tag, attrs):
if tag == 'body':
self.pre_tag = self.legal_tag[0] # make the pre_tag be a legle tag on purpose
return
if tag in self.legal_tag:
self.clean_text.append('<')
self.clean_text.append(tag)
for name, val in attrs:
if name not in self.illegal_attr and not name.startswith('on'):
exp = ' %s="%s"' % (name, val)
self.clean_text.append(exp)
self.clean_text.append('>')
self.pre_tag = tag
def handle_data(self, data):
if self.pre_tag in self.legal_tag:
self.clean_text.append(data)
def handle_endtag(self, tag):
if tag in self.legal_tag:
self.clean_text.append('</{0}>'.format(tag))
self.pre_tag = None
def get_clean_text(self):
return self.wrapENML(''.join(self.clean_text))
def handle_media(self):
'''
Do something specially for the media resource(tag and attributes) in html.
But actually I found that these tags without any extra handling work perfectly in evernote!
notice -> there are 3 protocols that enml supporting : http, https, file
'''
pass
def wrapENML(self, clean_text):
header = '<?xml version="1.0" encoding="UTF-8"?>\n'
header += '<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">\n'
return '%s\n<en-note>%s</en-note>' % (header, clean_text)
class Struct:
def __init__(self, **entries):
self.__dict__.update(entries)
def get_default_notetitle(path):
'''
Suppose we are in *nix OS.:)
'''
separator = '/'
return path.split(separator)[-1].split('.')[0]
def get_user_credentials():
'''
Prompts the user for a username and password.
'''
try:
login = None
password = None
if login is None:
login = raw_input("Evernote Login: ")
if password is None:
password = raw_input("Evernote Password: ")
except (KeyboardInterrupt, SystemExit):
sys.exit()
return login, password
def import_evernote_lib():
lib_path = './lib'
if lib_path not in sys.path:
sys.path.append(lib_path)