-
Notifications
You must be signed in to change notification settings - Fork 0
/
mc2csv.py
executable file
·218 lines (184 loc) · 7.75 KB
/
mc2csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Takes a Moodle XML export file of questions and exports all the
multiple choice questions into a csv file.
Usage php mc2csv.php --in questions.xml
Optional arguments:
--delimiter char Delimiter character for the csv file, default is ";"
--keephtml Do not strip html tags at all.
--keeptags 'a,b,c' List of tags that should not be eliminated when
stripping html from the questions. The default
tags that are not purged: i,img,s,strong,sub,sub,u
--out filename.csv if not provided the base name of the xml file is
used, trailed by a .csv suffix.
"""
import xml.etree.ElementTree as ET
from html.parser import HTMLParser
import os, sys, csv
def dieNice(errMsg = ""):
print("Error: {0}\nUsage: {1} --in moodle_questions.xml\nType --help for more details."
.format(errMsg, os.path.basename(sys.argv[0])))
sys.exit(1)
class HTMLTagRemover(HTMLParser):
def __init__(self, keeptags):
super().__init__()
self.result = []
self.keeptags = keeptags
def handle_data(self, data):
self.result.append(data)
def handle_starttag(self, tag, attrs):
try:
self.keeptags.index(tag)
self.result.append('<' + tag + ', '.join(['{}="{}"'.format(k,v) for k,v in attrs.iteritems()]) + '>')
except:
pass
def handle_endtag(self, tag):
try:
self.keeptags.index(tag)
self.result.append('</' + tag + '>')
except:
pass
def handle_startendtag(tag, attrs):
try:
self.keeptags.index(tag) > -1
self.result.append('<' + tag + ', '.join(['{}="{}"'.format(k,v) for k,v in attrs.iteritems()]) + '/>')
except:
pass
def get_text(self):
return ''.join(self.result)
class McHandler:
def __init__(self, cols, options):
self.cols = cols
self.keeptags = options['keeptags']
self.keephtml = options['keephtml']
def insertColInRow(self, row, name, value):
try:
i = self.cols.index(name)
row[i] = value
except ValueError:
self.cols.append(name)
row.append(value)
def formatHtml(self, value):
if value == None:
return ''
if self.keephtml == True:
return value
remover = HTMLTagRemover(self.keeptags)
remover.feed(value)
return remover.get_text()
def handle(self, question):
if 'type' not in question.attrib or question.attrib['type'] != 'multichoice':
return []
row = ['' for i in range(0, len(self.cols))]
self.insertColInRow(row, 'name', question.find('name/text').text)
self.insertColInRow(row, 'questiontext', self.formatHtml(question.find('questiontext/text').text))
self.insertColInRow(row, 'generalfeedback', self.formatHtml(question.find('generalfeedback/text').text))
self.insertColInRow(row, 'defaultgrade', question.find('defaultgrade').text)
self.insertColInRow(row, 'penalty', question.find('penalty').text)
self.insertColInRow(row, 'hidden', question.find('hidden').text)
self.insertColInRow(row, 'idnumber', question.find('idnumber').text)
self.insertColInRow(row, 'single', '1' if question.find('single').text.strip() == 'true' else '0')
self.insertColInRow(row, 'shuffleanswers', '1' if question.find('shuffleanswers').text.strip() == 'true' else '0')
self.insertColInRow(row, 'answernumbering', question.find('answernumbering').text)
self.insertColInRow(row, 'showstandardinstruction', question.find('showstandardinstruction').text)
self.insertColInRow(row, 'correctfeedback', self.formatHtml(question.find('correctfeedback/text').text))
self.insertColInRow(row, 'partiallycorrectfeedback', self.formatHtml(question.find('partiallycorrectfeedback/text').text))
self.insertColInRow(row, 'incorrectfeedback', self.formatHtml(question.find('incorrectfeedback/text').text))
i = 1
for answer in question.findall('answer'):
self.insertColInRow(row, 'answer_' + str(i), self.formatHtml(answer.find('text').text))
self.insertColInRow(row, 'answer_' + str(i) + '_fraction', answer.attrib['fraction'])
self.insertColInRow(row, 'answer_' + str(i) + '_feedback', self.formatHtml(answer.find('feedback/text').text))
i = i + 1
return row
class QuestionXml:
def __init__(self):
self.infile = None
self.outfile = None
self.delimiter = ';'
self.keeptags = ['i', 'img', 's', 'strong', 'sub', 'sub', 'u']
self.keephtml = False
self.cols = []
self.rows = []
def process(self):
# Passing the path of the
# xml document to enable the
# parsing process
if self.infile == None:
dieNice('No Moodle XML file provided')
try:
tree = ET.parse(self.infile)
except FileNotFoundError:
dieNice('File {0} not found'.format(self.infile))
except ET.ParseError:
dieNice('Error parsing XML in file ' + self.infile)
# getting the parent tag of
# the xml document
root = tree.getroot()
handler = McHandler(self.cols, {'keephtml': self.keephtml, 'keeptags': self.keeptags})
for question in root.findall('question'):
row = handler.handle(question)
if len(row) > 0:
self.rows.append(row)
def writeCsv(self):
# Check, if we have a file name set.
if self.outfile == None:
p = self.infile.rfind('.')
if p == -1:
p = len(self.infile)
self.outfile = self.infile[0, p] + '.csv'
# Open the file.
try:
fp = open(self.outfile, "w")
except:
dieNice('Could not open file %s for writing result' % self.outfile)
# Setup the writer and put the columns (as header) and then all rows into the file.
writer = csv.writer(fp, delimiter = self.delimiter)
writer.writerow(self.cols)
for row in self.rows:
writer.writerow(row)
fp.close()
def main():
"""Evaluate the cli arguments, built up the work object
with the parameters to process the xml file. Finally write
the output into a csv file."""
# available options that can be changed via the command line
options = ['in', 'out', 'delimiter', 'keephtml', 'keeptags', 'help']
# the worker that does the parsing.
worker = QuestionXml()
# try to fetch the command line args
currentCmd = ''
for i in range(len(sys.argv)):
if i == 0:
continue
arg = sys.argv[i]
if arg[0:2] == '--':
currentCmd = arg[2:]
if not(currentCmd in options):
dieNice(f"Invalid argument {0}".format(currentCmd))
if currentCmd == 'help':
print(__doc__)
sys.exit(0)
elif currentCmd == 'keephtml':
worker.keephtml = True
currentCmd = ''
elif len(currentCmd) > 0:
if currentCmd == 'in':
worker.infile = arg
elif currentCmd == 'out':
worker.outfile = arg
elif currentCmd == 'delimiter':
if len(arg) > 1:
dieNice('Delimiter must be one character only')
worker.delimiter = arg
elif currentCmd == 'keeptags':
worker.keeptags = list(map(lambda x: x.strip(), arg.split(',')))
currentCmd = ''
else:
dieNice('invalid argument: ' + arg)
# process the data now
worker.process()
worker.writeCsv()
if __name__ == "__main__":
main()