From 657371acca235235bbb852014b594ad965cc19d9 Mon Sep 17 00:00:00 2001 From: sarahmccuan <35535105+sarahmccuan@users.noreply.github.com> Date: Sat, 15 Jun 2024 19:57:31 -0500 Subject: [PATCH] add a dockerfile so this isn't so annoying to build from scratch (#19) --- Dockerfile | 20 ++++++++++++++++++++ build-html.py | 2 +- requirements.txt | 4 ++++ scripts/normalize.py | 16 ++++++++-------- scripts/validate.py | 2 +- 5 files changed, 34 insertions(+), 10 deletions(-) create mode 100644 Dockerfile create mode 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..9d0ca3a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.8-slim + +WORKDIR /usr/src/app + +COPY requirements.txt ./ + +RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir -r requirements.txt + +COPY . . + +RUN ["python", "scripts/normalize.py"] + +RUN ["python", "scripts/validate.py", ">", "test_results.txt"] + +RUN ["python", "build-html.py"] + +WORKDIR /usr/src/app/docs + +CMD ["python", "-m", "http.server"] \ No newline at end of file diff --git a/build-html.py b/build-html.py index dd11d43..0b66511 100644 --- a/build-html.py +++ b/build-html.py @@ -84,5 +84,5 @@ def wrap_title(title: str, id = ''): print(body) html = '\n'.join([HEADER, body, FOOTER]) -with open('docs/index.html', 'w', encoding="utf-8") as f: +with open('docs/greekwar.html', 'w', encoding="utf-8") as f: f.write(html) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2e4b7fd --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +text_validator +pathlib +greek-normalisation +markdown diff --git a/scripts/normalize.py b/scripts/normalize.py index 6af028c..3f4ddcb 100644 --- a/scripts/normalize.py +++ b/scripts/normalize.py @@ -15,14 +15,14 @@ def normalize_file(filename): print('Error: ' + str(sys.exc_info()[0])) # relative path logic -chambers_path = Path(__file__).parent.parent -files_dir = (chambers_path / 'text\\').resolve() -files_list = glob.glob(str(files_dir) + '*.txt') +# chambers_path = Path(__file__).parent.parent +# files_dir = (chambers_path / 'text\\').resolve() +# files_list = glob.glob(str(files_dir) + '*.txt') -# normalize_file("C:/Users/Sarah McCuan/Documents/projects/ChambersGreekWar/drafts/chambers_ocr.md") -normalize_file("C:/Users/Sarah McCuan/Documents/projects/ChambersGreekWar/drafts/greek_english_vocab.md") +# instead of using the list above and potentially messing up in-progress files, +# it's easier to just individually specify what we want normalized: -# for file in files_list: -# normalize_file(file) +files_list = ['text/chambers_w_headers.txt', 'text/chambers.txt', 'docs/greek_english_vocab.html', 'docs/greekwar.html', 'docs/index.html'] -# print(validate((chambers_path / 'text-validator.toml').resolve(), files_list)) +for file in files_list: + normalize_file(file) \ No newline at end of file diff --git a/scripts/validate.py b/scripts/validate.py index ec15f8f..a57cb18 100644 --- a/scripts/validate.py +++ b/scripts/validate.py @@ -1,3 +1,3 @@ from text_validator.main import validate -validate("C:/Users/Sarah McCuan/Documents/projects/ChambersGreekWar/text-validator.toml", ["C:/Users/Sarah McCuan/Documents/projects/ChambersGreekWar/text/01.txt"]) \ No newline at end of file +validate("text-validator.toml", ["text/chambers_w_headers.txt"]) \ No newline at end of file