-
Notifications
You must be signed in to change notification settings - Fork 2
/
generate_book.py
325 lines (272 loc) · 14.4 KB
/
generate_book.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
from subprocess import check_call
import os
import os.path as op
import sys
import shutil as sh
import yaml
import nbformat as nbf
from nbclean import NotebookCleaner
from tqdm import tqdm
import numpy as np
from glob import glob
from uuid import uuid4
import argparse
DESCRIPTION = ("Convert a collection of Jupyter Notebooks into Jekyll "
"markdown suitable for a course textbook.")
# Add path to our utility functions
this_folder = op.dirname(op.abspath(__file__))
sys.path.append(op.join(this_folder, 'scripts'))
from jupyterbook.utils import (_split_yaml, _check_url_page, _prepare_toc,
_prepare_url, _clean_notebook_cells, _error)
parser = argparse.ArgumentParser(description=DESCRIPTION)
parser.add_argument("--site-root", default=None, help="Path to the root of the textbook repository.")
parser.add_argument("--path-template", default=None, help="Path to the template nbconvert uses to build markdown files")
parser.add_argument("--path-config", default=None, help="Path to the Jekyll configuration file")
parser.add_argument("--path-toc", default=None, help="Path to the Table of Contents YAML file")
parser.add_argument("--overwrite", action='store_true', help="Overwrite md files if they already exist.")
parser.add_argument("--execute", action='store_true', help="Execute notebooks before converting to MD.")
parser.add_argument("--local-build", action='store_true',
help="Specify you are building site locally for later upload.")
parser.set_defaults(overwrite=False, execute=False)
# Defaults
BUILD_FOLDER_NAME = "_build"
SUPPORTED_FILE_SUFFIXES = ['.ipynb', '.md']
def _clean_lines(lines, filepath):
"""Replace images with jekyll image root and add escape chars as needed."""
inline_replace_chars = ['#']
# Images: replace absolute nbconvert image paths to baseurl paths
path_rel_root = op.relpath(PATH_SITE_ROOT, op.dirname(filepath))
path_rel_root_one_up = path_rel_root.replace('../', '', 1)
for ii, line in enumerate(lines):
# Handle relative paths because we remove `content/` from the URL
# If there's a path that goes back to the root, remove a level`
# This is for images referenced directly in the markdown
if path_rel_root in line:
line = line.replace(path_rel_root, path_rel_root_one_up)
# For programmatically-generated images from notebooks, replace the abspath with relpath
line = line.replace(PATH_IMAGES_FOLDER, op.relpath(PATH_IMAGES_FOLDER, op.dirname(filepath)))
# Adding escape slashes since Jekyll removes them when it serves the page
# Make sure we have at least two dollar signs and they
# Aren't right next to each other
dollars = np.where(['$' == char for char in line])[0]
if len(dollars) > 2 and all(ii > 1 for ii in (dollars[1:] - dollars[:1])):
for char in inline_replace_chars:
line = line.replace('\\{}'.format(char), '\\\\{}'.format(char))
line = line.replace(' \\$', ' \\\\$')
lines[ii] = line
return lines
def _copy_non_content_files():
"""Copy non-markdown/notebook files in the content folder into build folder so relative links work."""
all_files = glob(op.join(PATH_CONTENT_FOLDER, '**', '*'), recursive=True)
non_content_files = [ii for ii in all_files if not any(ii.endswith(ext) for ext in SUPPORTED_FILE_SUFFIXES)]
for ifile in non_content_files:
if op.isdir(ifile):
continue
# The folder name may change if the permalink sanitizing changes it.
# this ensures that a new folder exists if needed
new_path = ifile.replace(os.sep + CONTENT_FOLDER_NAME, os.sep + BUILD_FOLDER_NAME)
if not op.isdir(op.dirname(new_path)):
os.makedirs(op.dirname(new_path))
sh.copy2(ifile, new_path)
def _case_sensitive_fs(path):
"""True when filesystem at `path` is case sensitive, False otherwise.
Checks this by attempting to write two files, one w/ upper case, one
with lower. If after this only one file exists, the system is case-insensitive.
Makes directory `path` if it does not exist.
"""
if not op.exists(path):
os.makedirs(path)
root = op.join(path, uuid4().hex)
fnames = [root + suffix for suffix in 'aA']
try:
for fname in fnames:
with open(fname, 'wt') as fobj:
fobj.write('text')
written = glob(root + '*')
finally:
for fname in written:
os.unlink(fname)
return len(written) == 2
if __name__ == '__main__':
###############################################################################
# Default values and arguments
args = parser.parse_args()
overwrite = bool(args.overwrite)
execute = bool(args.execute)
if args.site_root is None:
args.site_root = op.join(op.dirname(op.abspath(__file__)), '..')
# Paths for our notebooks
PATH_SITE_ROOT = op.abspath(args.site_root)
PATH_TOC_YAML = args.path_toc if args.path_toc is not None else op.join(PATH_SITE_ROOT, '_data', 'toc.yml')
CONFIG_FILE = args.path_config if args.path_config is not None else op.join(PATH_SITE_ROOT, '_config.yml')
PATH_TEMPLATE = args.path_template if args.path_template is not None else op.join(PATH_SITE_ROOT, 'scripts', 'templates', 'jekyllmd.tpl')
PATH_IMAGES_FOLDER = op.join(PATH_SITE_ROOT, '_build', 'images')
BUILD_FOLDER = op.join(PATH_SITE_ROOT, BUILD_FOLDER_NAME)
###############################################################################
# Read in textbook configuration
# Load the yaml for this site
with open(CONFIG_FILE, 'r') as ff:
site_yaml = yaml.load(ff.read())
CONTENT_FOLDER_NAME = site_yaml.get('content_folder_name').strip('/')
PATH_CONTENT_FOLDER = op.join(PATH_SITE_ROOT, CONTENT_FOLDER_NAME)
# Load the textbook yaml for this site
if not op.exists(PATH_TOC_YAML):
_error("No toc.yml file found, please create one")
with open(PATH_TOC_YAML, 'r') as ff:
toc = yaml.load(ff.read())
# Drop divider items and non-linked pages in the sidebar, un-nest sections
toc = _prepare_toc(toc)
###############################################################################
# Generating the Jekyll files for all content
n_skipped_files = 0
n_built_files = 0
case_check = _case_sensitive_fs(BUILD_FOLDER) and args.local_build
print("Convert and copy notebook/md files...")
for ix_file, page in enumerate(tqdm(list(toc))):
url_page = page.get('url', None)
title = page.get('title', None)
if page.get('external', None):
# If its an external link, just pass
continue
# Make sure URLs (file paths) have correct structure
_check_url_page(url_page, CONTENT_FOLDER_NAME)
###############################################################################
# Create path to old/new file and create directory
# URL will be relative to the CONTENT_FOLDER
path_url_page = os.path.join(PATH_CONTENT_FOLDER, url_page.lstrip('/'))
path_url_folder = os.path.dirname(path_url_page)
# URLs shouldn't have the suffix in there already so now we find which one to add
for suf in SUPPORTED_FILE_SUFFIXES:
if op.exists(path_url_page + suf):
path_url_page = path_url_page + suf
break
if not op.exists(path_url_page):
raise _error("Could not find file called {} with any of these extensions: {}".format(path_url_page, SUPPORTED_FILE_SUFFIXES))
# Create and check new folder / file paths
path_new_folder = path_url_folder.replace(os.sep + CONTENT_FOLDER_NAME, os.sep + BUILD_FOLDER_NAME)
path_new_file = op.join(path_new_folder, op.basename(path_url_page).replace('.ipynb', '.md'))
if overwrite is False and op.exists(path_new_file) \
and os.stat(path_new_file).st_mtime > os.stat(path_url_page).st_mtime:
n_skipped_files += 1
continue
if not op.isdir(path_new_folder):
os.makedirs(path_new_folder)
###############################################################################
# Generate previous/next page URLs
if ix_file == 0:
url_prev_page = ''
prev_file_title = ''
else:
prev_file_title = toc[ix_file-1].get('title')
url_prev_page = toc[ix_file-1].get('url')
url_prev_page = _prepare_url(url_prev_page)
if ix_file == len(toc) - 1:
url_next_page = ''
next_file_title = ''
else:
next_file_title = toc[ix_file+1].get('title')
url_next_page = toc[ix_file+1].get('url')
url_next_page = _prepare_url(url_next_page)
###############################################################################
# Get kernel name from notebooks metadata
kernel_name = ''
if path_url_page.endswith('.ipynb'):
data = nbf.read(path_url_page, nbf.NO_CONVERT)
kernel_name = data['metadata']['kernelspec']['name']
###############################################################################
# Content conversion
# Convert notebooks or just copy md if no notebook.
if path_url_page.endswith('.ipynb'):
# Create a temporary version of the notebook we can modify
tmp_notebook = path_url_page + '_TMP'
sh.copy2(path_url_page, tmp_notebook)
###############################################################################
# Notebook cleaning
# Clean up the file before converting
cleaner = NotebookCleaner(tmp_notebook)
cleaner.remove_cells(empty=True)
if site_yaml.get('hide_cell_text', False):
cleaner.remove_cells(search_text=site_yaml.get('hide_cell_text'))
if site_yaml.get('hide_code_text', False):
cleaner.clear(kind="content", search_text=site_yaml.get('hide_code_text'))
cleaner.clear('stderr')
cleaner.save(tmp_notebook)
_clean_notebook_cells(tmp_notebook)
###############################################################################
# Conversion to Jekyll Markdown
# Run nbconvert moving it to the output folder
# This is the output directory for `.md` files
build_call = '--FilesWriter.build_directory={}'.format(path_new_folder)
# Copy notebook output images to the build directory using the base folder name
path_after_build_folder = path_new_folder.split(os.sep + BUILD_FOLDER_NAME + os.sep)[-1]
nb_output_folder = op.join(PATH_IMAGES_FOLDER, path_after_build_folder)
images_call = '--NbConvertApp.output_files_dir={}'.format(nb_output_folder)
call = ['jupyter', 'nbconvert', '--log-level="CRITICAL"',
'--to', 'markdown', '--template', PATH_TEMPLATE,
images_call, build_call, tmp_notebook]
if execute is True:
call.insert(-1, '--execute')
check_call(call)
os.remove(tmp_notebook)
elif path_url_page.endswith('.md'):
# If a non-notebook file, just copy it over.
# If markdown we'll add frontmatter later
sh.copy2(path_url_page, path_new_file)
else:
raise _error("Files must end in ipynb or md. Found file {}".format(path_url_page))
###############################################################################
# Modify the generated Markdown to work with Jekyll
# Clean markdown for Jekyll quirks (e.g. extra escape characters)
with open(path_new_file, 'r') as ff:
lines = ff.readlines()
lines = _clean_lines(lines, path_new_file)
# Split off original yaml
yaml_orig, lines = _split_yaml(lines)
# Front-matter YAML
yaml_fm = []
yaml_fm += ['---']
# In case pre-existing links are sanitized
sanitized = url_page.lower().replace('_', '-')
if sanitized != url_page:
if case_check and url_page.lower() == sanitized:
raise RuntimeError(
'Redirect {} clashes with page {} for local build on '
'case-insensitive FS\n'.format(sanitized, url_page) +
'Rename source page to lower case or build on a case '
'sensitive FS, e.g. case-sensitive disk image on Mac')
yaml_fm += ['redirect_from:']
yaml_fm += [' - "{}"'.format(sanitized)]
if path_url_page.endswith('.ipynb'):
interact_path = CONTENT_FOLDER_NAME + '/' + path_url_page.split(CONTENT_FOLDER_NAME+'/')[-1]
yaml_fm += ['interact_link: {}'.format(interact_path)]
yaml_fm += ["kernel_name: {}".format(kernel_name)]
yaml_fm += ["title: '{}'".format(title)]
yaml_fm += ['prev_page:']
yaml_fm += [' url: {}'.format(url_prev_page)]
yaml_fm += [" title: '{}'".format(prev_file_title)]
yaml_fm += ['next_page:']
yaml_fm += [' url: {}'.format(url_next_page)]
yaml_fm += [" title: '{}'".format(next_file_title)]
# Add back any original YaML, and end markers
yaml_fm += yaml_orig
yaml_fm += ['comment: "***PROGRAMMATICALLY GENERATED, DO NOT EDIT. SEE ORIGINAL FILES IN /{}***"'.format(CONTENT_FOLDER_NAME)]
yaml_fm += ['---']
yaml_fm = [ii + '\n' for ii in yaml_fm]
lines = yaml_fm + lines
# Write the result
with open(path_new_file, 'w') as ff:
ff.writelines(lines)
n_built_files += 1
###############################################################################
# Finishing up...
# Copy non-markdown files in notebooks/ in case they're referenced in the notebooks
print('Copying non-content files inside `{}/`...'.format(CONTENT_FOLDER_NAME))
_copy_non_content_files()
# Message at the end
print("\n===========")
print("Generated {} new files\nSkipped {} already-built files".format(n_built_files, n_skipped_files))
if n_built_files == 0:
print("Delete the markdown files in '{}' for any pages that you wish to re-build, or use --overwrite option to re-build all.".format(BUILD_FOLDER_NAME))
print("\nYour Jupyter Book is now in `{}/`.".format(BUILD_FOLDER_NAME))
print("\nDemo your Jupyter book with `make serve` or push to GitHub!")
print('===========\n')