-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcnxmlplus2html
executable file
·228 lines (202 loc) · 8.47 KB
/
cnxmlplus2html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#!/usr/bin/env python
from lxml import etree
import sys
import os
import argparse
import copy
import inspect
from XmlValidator import XmlValidator
from XmlValidator import utils
from xml.sax.saxutils import unescape, escape
def escape_code(document):
'''Given document as string, return with code elements replaced by their
escaped string'''
envs = [('<code>', r'</code>'),
(r'\(', r'\)'),
(r'\[', r'\]'),
(r'\begin{align*}', r'\end{align*}')]
for env in envs:
start, end = env
code_start = document.split(start)
for i, code_block in enumerate(code_start):
code_end = code_block.find(end)
if not code_end > 1:
continue
code = code_block[0:code_end]
# escape the > < and &
newcode = escape(code)
code_start[i] = newcode + code_start[i][code_end:]
document = start.join(code_start)
return document
if __name__ == "__main__":
MY_PATH = os.path.realpath(os.path.dirname(__file__))
SPEC_PATH = os.path.dirname(inspect.getfile(XmlValidator))
# Parse command line arguments
argumentParser = argparse.ArgumentParser(
description='Convert a CNXML+ document to a HTML document.')
argumentParser.add_argument(
'--spec', dest='specFilename',
default="spec.xml",
help='Filename of the XML specification document.')
argumentParser.add_argument(
'--audience', dest='audience',
default="learner",
help='Target audience of the transform ( learner | teacher | correct ).')
argumentParser.add_argument(
'-o', dest='outputFilename',
help='Write output to given filename rather than stdout.')
argumentParser.add_argument(
'filename', nargs='+',
help='One or more filenames to process.')
commandlineArguments = argumentParser.parse_args()
if commandlineArguments.outputFilename is None:
outputFile = sys.stdout
else:
outputFile = open(commandlineArguments.outputFilename, 'wt')
validator = XmlValidator(
open(os.path.join(SPEC_PATH, commandlineArguments.specFilename), 'rt').read())
mathml_transform = utils.MmlTex()
conversionFunctions = {} # Cache
def cache_conversion_function(iSpec):
global conversionFunctions, validator, utils, commandlineArguments
if isinstance(iSpec, basestring):
# xpath given rather than node
specEntry = None
for child in validator.spec:
xpathNode = child.find('xpath')
if xpathNode is None:
continue
if xpathNode.text == iSpec:
specEntry = child
break
else:
specEntry = iSpec
conversionFunction = conversionFunctions.get(specEntry)
if conversionFunction is None:
# Cache conversion function
conversionFunctionNodes = specEntry.xpath(
'.//conversion-callback[contains(@name, "html") and contains(@name, "%s") and not(contains(@name, "html5"))]' % commandlineArguments.audience)
if len(conversionFunctionNodes) == 0:
conversionFunctionNodes = specEntry.xpath(
'.//conversion-callback[contains(@name, "html") and contains(@name, "%s")]' % commandlineArguments.audience)
if len(conversionFunctionNodes) == 0:
utils.warning_message(
'No conversion entry for ' + specEntry.find('xpath').text)
conversionFunctionSource = 'conversionFunction = lambda self: None'
else:
conversionFunctionSource = conversionFunctionNodes[
0].text.strip()
if conversionFunctionSource == '':
conversionFunctionSource = 'conversionFunction = lambda self: None'
else:
conversionFunctionSource = 'def conversionFunction(self):\n' + '\n'.join(
['\t' + line for line in conversionFunctionSource.split('\n')]) + '\n'
else:
if len(conversionFunctionNodes) != 1:
utils.error_message(
'More than 1 conversion entry for ' + specEntry.find('xpath').text)
conversionFunctionSource = conversionFunctionNodes[
0].text.strip()
if conversionFunctionSource == '':
conversionFunctionSource = 'conversionFunction = lambda self: None'
else:
conversionFunctionSource = 'def conversionFunction(self):\n' + '\n'.join(
['\t' + line for line in conversionFunctionSource.split('\n')]) + '\n'
from lxml import etree
from XmlValidator import utils
import xml
localVars = {
'copy': copy,
'os': os,
'etree': etree,
'utils': utils,
'convert_using': convert_using,
'warning_message': utils.warning_message,
'error_message': utils.error_message,
'mathml_transform': mathml_transform,
'escape_latex': utils.escape_latex,
'latex_math_function_check': utils.latex_math_function_check,
}
exec(conversionFunctionSource, localVars)
conversionFunction = localVars['conversionFunction']
conversionFunctions[specEntry] = conversionFunction
return conversionFunction
def convert_using(iNode, iPath):
f = cache_conversion_function(iPath)
return f(iNode)
def convert_image(iSourceFilename, iDestinationFilename):
import subprocess
p = subprocess.Popen(
['convert', iSourceFilename, iDestinationFilename])
p.wait()
def traverse(iNode, iValidator):
global conversionFunctions
children = iNode.getchildren()
for child in children:
traverse(child, iValidator)
# Get associated conversion function
specEntry = iValidator.documentSpecEntries.get(iNode)
if specEntry is None:
utils.error_message(
'Unhandled element at ' + utils.get_full_dom_path(iNode, iValidator.spec))
conversionFunction = cache_conversion_function(specEntry)
parent = iNode.getparent()
try:
converted = conversionFunction(iNode)
except Exception as Error:
print 'Error: %s %s\nNode: %s\n Parent: %s\n line: %s' % (Error, type(Error), iNode.tag, parent.tag, iNode.sourceline)
sys.exit(1)
if isinstance(converted, basestring):
if parent is None:
return (converted)
else:
from lxml import etree
dummyNode = etree.Element('dummy')
dummyNode.text = unescape(converted)
utils.etree_replace_with_node_list(parent, iNode, dummyNode)
elif converted is not None:
if parent is None:
return unescape(converted)
else:
utils.etree_replace_with_node_list(
iNode.getparent(), iNode, converted)
for filename in commandlineArguments.filename:
if filename == '-':
fp = sys.stdin
else:
fp = open(filename, 'rt')
validator.validate(
fp.read(),
iCleanUp=True)
document = validator.dom
Title = filename.replace('.cnxmlplus', '').replace('-', ' ')
# capitalise first letter
# print etree.tostring(traverse(document, spec), encoding="utf-8",
# xml_declaration=True)
outputdoc = traverse(document, validator).encode('utf-8')
htmloutputdoc = '''<!DOCTYPE html>
<html>
<head>
<title>{Title}</title>
</head>
<body>
{Content}
</body>
</html>'''.format(Content=outputdoc, Title=Title)
htmloutputdoc = escape_code(htmloutputdoc)
print(htmloutputdoc)
# outputFile.flush()
# clean up
# for f in ["figure-autopp.cb",
# "figure.aux",
# "figure.cb",
# "figure.cb2",
# "figure.epsi",
# "figure.log",
# "figure.pdf",
# "figure-pics.pdf",
# "figure.png",
# "figure.ps",
# "figure.tex"]:
# if os.path.exists(f):
# os.remove(f)