diff --git a/.flake8 b/.flake8
deleted file mode 100644
index d9ad0b40..00000000
--- a/.flake8
+++ /dev/null
@@ -1,5 +0,0 @@
-[flake8]
-ignore = E203, E266, E501, W503, F403, F401
-max-line-length = 79
-max-complexity = 18
-select = B,C,E,F,W,T4,B9
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 00000000..39663c55
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,5 @@
+02babfb698a8dfbeb87f5be6ad21172eba82bc05
+ee480fbd24c2d0b1730f5ae4a6be6c6bc842eb94
+1862060ef717c05080c9b47497dc79328563b072
+3416098be96c2e8efee5c5ce1e935711575d2e47
+13435428e87005f168db210019759bf7578ec06f
diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml
index 10759abf..f272dfa5 100644
--- a/.github/workflows/pythonpublish.yml
+++ b/.github/workflows/pythonpublish.yml
@@ -9,21 +9,21 @@ jobs:
   deploy:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up Python
-        uses: actions/setup-python@v1
+        uses: actions/setup-python@v4
         with:
           python-version: "3.x"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install setuptools wheel twine
+          pip install build twine
       - name: Build and publish
         env:
           TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
           TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
         run: |
-          python setup.py sdist bdist_wheel
+          python -m build --sdist --wheel
           twine upload dist/*
       - name: Determine tag
         id: determine_tag
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 00000000..4b4b61f7
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,77 @@
+name: Run Tests
+on:
+  - push
+  - pull_request
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    # #no-ci in the commit log flags commit we don't want CI-validated
+    if: ${{ !contains(github.event.head_commit.message, '#no-ci') }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: FedericoCarboni/setup-ffmpeg@v2
+        id: setup-ffmpeg
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.7"
+          cache: "pip"
+
+      - name: Install Python dependencies
+        run: |
+          # Keep pip up to date
+          python -m pip install --upgrade pip
+          # Some dependencies are built using wheel
+          pip install wheel
+          # Install all Python dependencies in just one pip call, including Studio itself
+          pip install -r requirements.txt \
+                      -r requirements.dev.txt \
+                      -r requirements.ci.txt \
+                      -e .
+
+      - name: Run tests
+        run: |
+          gunicorn readalongs.app:app --bind 0.0.0.0:5000 --daemon
+          cd test && coverage run run.py prod && coverage xml
+
+      - name: Nitpicking
+        run: |
+          # coding style: we want black compliance
+          find . -name \*.py | xargs black --check
+          # Legal check: make sure we don't have or introduce GPL dependencies
+          if pip-licenses | grep -v 'Artistic License' | grep -v LGPL | grep GNU; then echo 'Please avoid introducing *GPL dependencies'; false; fi
+
+      - uses: codecov/codecov-action@v3
+        with:
+          directory: ./test
+          token: ${{ secrets.CODECOV_TOKEN }}  # optional but apparently makes upload more reliable
+          fail_ci_if_error: false # too many upload errors to keep "true"
+
+  test-on-windows:
+    runs-on: windows-latest
+    if: ${{ !contains(github.event.head_commit.message, '#no-ci') }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: FedericoCarboni/setup-ffmpeg@v2
+        id: setup-ffmpeg
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.7"
+          cache: "pip"
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install wheel
+          pip install -r requirements.txt `
+                      -r requirements.dev.txt `
+                      -r requirements.ci.txt `
+                      -e .
+
+      - name: Run tests on Windows
+        run: cd test && python run.py prod
diff --git a/.gitlint b/.gitlint
new file mode 100644
index 00000000..f5e52106
--- /dev/null
+++ b/.gitlint
@@ -0,0 +1,9 @@
+[general]
+# Enable conventional commit linting
+contrib=contrib-title-conventional-commits
+
+# Ignore any data sent to gitlint via stdin (helpful on Windows)
+ignore-stdin=true
+
+# We don't require a body, just a title, even though a body is also a good idea
+ignore=body-is-missing
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 10697319..e91dcd6c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,6 +12,8 @@ repos:
     -   id: flake8
 -   repo: local
     # Using local repos because these won't work for me from remote repo -EJ
+    # They're also more convenient because we install them via requirements.dev.txt
+    # and they are then available on the command line as well as in pre-commit.
     hooks:
     -   id: isort
         name: isort
@@ -25,7 +27,9 @@ repos:
         language: system
         types: [python]
         stages: [commit]
--   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 'v0.782'  # Use the sha / tag you want to point at
-    hooks:
     -   id: mypy
+        name: mypy
+        entry: mypy
+        language: system
+        types: [python]
+        stages: [commit]
diff --git a/.pylintrc b/.pylintrc
index 22854677..4d44b940 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,7 +1,14 @@
 [MASTER]
 # A lot of test cases depend on etree, let's allow pylint to load it
 extension-pkg-allow-list=lxml.etree
-# We use isort for sorting our imports, so nevermind what pylint thinks
-disable=wrong-import-order
+
+disable=
+    # We use isort for sorting our imports, so nevermind what pylint thinks
+    wrong-import-order,
+    # I find the "unnecessary" else makes code more readable
+    no-else-return,
+    # We use single letter e for exception, f for file handles
+    invalid-name
+
 # Add . to the PYTHONPATH so pylint knows test cases can import basic_test_case
 init-hook="import sys; sys.path.append('.')"
diff --git a/.readthedocs.yml b/.readthedocs.yml
index c926addc..ecbaa4bf 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -1,13 +1,18 @@
 version: 2
 
 build:
-    os: ubuntu-20.04
-    tools:
-        python: "3.7"
+  os: ubuntu-20.04
+  tools:
+    python: "3.7"
+  jobs:
+    post_install:
+      - echo "Installing Studio itself in its current state"
+      - which pip python
+      - pip install -e .
 
 sphinx:
-    configuration: docs/conf.py
+  configuration: docs/conf.py
 
 python:
-    install:
-        - requirements: docs/requirements.txt
+  install:
+    - requirements: docs/requirements.txt
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 00000000..91da82e6
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,46 @@
+cff-version: 1.2.0
+message: >-
+  If you use this software in a project of yours and write about it, please
+  cite our SIGUL 2022 paper using the following citation data.
+title: ReadAlongs Studio
+url: https://github.com/ReadAlongs/Studio
+preferred-citation:
+  type: conference-paper
+  title: >-
+    ReadAlong Studio: Practical Zero-Shot Text-Speech Alignment for Indigenous
+    Language Audiobooks
+  authors:
+    - given-names: Patrick
+      family-names: Littell
+      email: Patrick.Littell@nrc-cnrc.gc.ca
+      affiliation: National Research Council Canada
+    - given-names: Eric
+      family-names: Joanis
+      email: Eric.Joanis@nrc-cnrc.gc.ca
+      affiliation: National Research Council Canada
+    - given-names: Aidan
+      family-names: Pine
+      email: Aidan.Pine@nrc-cnrc.gc.ca
+      affiliation: National Research Council Canada
+    - given-names: Marc
+      family-names: Tessier
+      email: Marc.Tessier@nrc-cnrc.gc.ca
+      affiliation: National Research Council Canada
+    - given-names: David
+      family-names: Huggins-Daines
+      email: dhdaines@gmail.com
+      affiliation: Independent Researcher
+    - given-names: Delasie
+      family-names: Torkornoo
+      email: delasie.torkornoo@carleton.ca
+      affiliation: Carleton University
+  collection-title: Proceedings of SIGUL2022 @LREC2022
+  start: 23
+  end: 32
+  year: 2022
+  month: 6
+  publisher:
+    name: European Language Resources Assiciation (ELRA)
+  location:
+    name: Marseille
+  url: http://www.lrec-conf.org/proceedings/lrec2022/workshops/SIGUL/pdf/2022.sigul-1.4.pdf
diff --git a/Contributing.md b/Contributing.md
index bb2fde66..7013855b 100644
--- a/Contributing.md
+++ b/Contributing.md
@@ -16,15 +16,16 @@ commits.
 Run these commands in each of your sandboxes to enable our pre-commit hooks and commitlint:
 
 ```sh
+pip install -r requirements.dev.txt
 pre-commit install
-npm install
+gitlint install-hook
 ```
 
 ## Pre-commit hooks
 
 The ReadAlong Studio team has agreed to systematically use a number of pre-commit hooks to
 normalize formatting of code. You need to install and enable pre-commit to have these used
-when you do your own commits.
+automatically when you do your own commits.
 
 Pre-commit hooks enabled:
 - check-yaml validates YAML files
@@ -60,11 +61,11 @@ don't forget to do so when you clone a new sandbox!
 
 ## commitlint
 
-The team has also agreed to use commitlint-style commit messages. Install and enable
-[commitlint](https://github.com/conventional-changelog/commitlint) to have your commits
-validated systematically.
+The team has also agreed to use [Conventional Commits](https://www.conventionalcommits.org/).
+Install and enable [gitlint](https://jorisroovers.com/gitlint/) to have your
+commit messages scanned automatically.
 
-Commitlint commits look like this:
+Convential commits look like this:
 
     type(optional-scope): subject (i.e., short description)
 
@@ -107,32 +108,14 @@ These rules are inspired by these commit formatting guides:
 
 ### Enabling commitlint
 
-We run commitlint on each commit message that you write by enabling the commit-msg hook in
-Git. It is run via [husky](https://www.npmjs.com/package/husky), which is a JS Git hook
-manager, and you need Node to run it.
-
-If you don't already use Node, this is a bit more work to install that the pre-commit
-hooks above, but please take a moment to do this:
+You can run commitlint on each commit message that you write by enabling the
+commit-msg hook in Git.
 
-- If you don't already use Node or nvm, or if you don't have admin access to the system
-  version of node, install nvm in your ~/.nvm folder:
-```sh
-wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.35.3/install.sh | bash
-```
-This will add a few lines to your `.bashrc` file, which you'll need to execute now,
-possibly by starting a new shell.
-
-- Install Node:
-```sh
-nvm install node
-```
+Run this command in your g2p sandbox to install and enable the commit-msg hook:
 
-- In your ReadAlong/Studio sandbox, install the husky commit-msg hook using npm, the node
-  package manager you just installed using nvm. The file `package.json` in Studio is what
-  tells npm to install husky as a pre-commit hook, and also what tells husky to invoke
-  commitlint on your commit messages.
 ```sh
-npm install
+pip install -r requirements/requirements.dev.txt
+gitlint install-hook
 ```
 
 - Now, next time you make a change and commit it, your commit log will be checked:
diff --git a/Dockerfile b/Dockerfile
index b850a467..8b97eb14 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,40 +3,46 @@ FROM debian:bullseye-slim
 ENV APPHOME /opt/readalong-studio
 ENV PORT 5000
 
-# Install system dependencies
-#  - swig: required by pocketsphinx
-#  - libpulse-dev: required by pocketsphinx
-#  - portaudio19-dev: required by pocketsphinx
-RUN apt-get update && apt-get install -y \
+# Lean, optimized installation of system dependencies
+RUN apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends --yes \
         python3 \
         python3-pip \
         git \
-        swig \
-        libpulse-dev \
-        portaudio19-dev \
         ffmpeg \
-        vim-nox
+        vim-nox \
+	less \
+    && apt-get clean \
+    && apt-get autoremove \
+    && rm -fr /var/lib/apt/lists/*
 
 # Install 3rd party dependencies in their own layer, for faster rebuilds when we
 # change ReadAlong-Studio source code
-RUN python3 -m pip install gevent
-ADD requirements.txt $APPHOME/requirements.txt
-RUN python3 -m pip install -r $APPHOME/requirements.txt
-# RUN python3 -m pip install gunicorn # If you want to run production server
+ADD requirements.* $APPHOME/
+RUN python3 -m pip install --upgrade pip \
+    && python3 -m pip install -r $APPHOME/requirements.txt \
+    && python3 -m pip install gevent
 
 # We don't want Docker to cache the installation of g2p or Studio, so place them
 # after COPY . $APPHOME, which almost invariable invalidates the cache.
 COPY . $APPHOME
 WORKDIR $APPHOME
 # Get and install the latest g2p
-RUN git clone https://github.com/roedoejet/g2p.git
-RUN cd g2p && python3 -m pip install -e .
+RUN git clone https://github.com/roedoejet/g2p.git \
+    && cd g2p \
+    && python3 -m pip install -e .
+
 # Install ReadAlong-Studio itself
 RUN python3 -m pip install -e .
 
-# Run the default gui (on localhost:5000)
+# Run the default gui (on localhost:5000, make sure you use -p 5000:5000 when
+# you docker run the image)
 CMD python3 ./run.py
 
 # For a production server, comment out the default gui CMD above, and run the
 # gui using gunicorn instead:
-# CMD gunicorn -k gevent -w 1 readalongs.app:app --bind 0.0.0.0:5000
+# CMD gunicorn -k gevent -w 1 readalongs.app:app --bind 0.0.0.0:$PORT
+
+# For the web API, use this CMD instead, the same on our Heroku deployment, except
+# with binding to port 5000
+# CMD gunicorn -w 4 -k uvicorn.workers.UvicornWorker readalongs.web_api:web_api_app --bind 0.0.0.0:$PORT
diff --git a/LICENSE b/LICENSE
index bee9b87b..162daf13 100644
--- a/LICENSE
+++ b/LICENSE
@@ -2,6 +2,7 @@ MIT License
 
 Copyright (c) 2019 David Huggins-Daines
 Copyright (c) 2019-2021 National Research Council Canada
+Acoustic model in readalongs/static/model/cmusphinx-en-us-5.2 Copyright (c) 2015 Alpha Cephei Inc. licensed under the FreeBSD License; see README in that directory.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/Procfile b/Procfile
new file mode 100644
index 00000000..f7b08354
--- /dev/null
+++ b/Procfile
@@ -0,0 +1,2 @@
+# Command for launching the web API server for ReadAlongs-Studio on Heroku
+web: gunicorn -w 4 -k uvicorn.workers.UvicornWorker readalongs.web_api:web_api_app
diff --git a/README.md b/README.md
index e240bb3f..173ee2c3 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # ReadAlong-Studio
 
 [![codecov](https://codecov.io/gh/ReadAlongs/Studio/branch/master/graph/badge.svg)](https://codecov.io/gh/ReadAlongs/Studio)
-[![Build Status](https://travis-ci.com/ReadAlongs/Studio.svg?branch=master)](https://travis-ci.com/github/ReadAlongs/Studio)
+[![Build Status](https://github.com/readalongs/Studio/actions/workflows/tests.yml/badge.svg?branch=master)](https://github.com/ReadAlongs/Studio/actions)
 [![PyPI package](https://img.shields.io/pypi/v/readalongs.svg)](https://pypi.org/project/readalongs/)
 [![GitHub license](https://img.shields.io/github/license/ReadAlongs/Studio)](https://github.com/ReadAlongs/Studio/blob/master/LICENSE)
 [![standard-readme compliant](https://img.shields.io/badge/readme%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/ReadAlongs/Studio)
@@ -22,7 +22,8 @@ This library is an end-to-end audio/text aligner. It is meant to be used togethe
     - [Validation](#Verifying-your-installation)
   - [Usage](#usage)
     - [CLI](#cli)
-    - [Studio](#Studio-web-application)
+    - [Web API](#web-api)
+    - [Studio](#studio-web-application)
     - [Docker](#docker)
   - [Maintainers](#maintainers)
   - [Contributing](#contributing)
@@ -154,9 +155,25 @@ Basic alignment is done with the following command.
 
 `readalongs align TEXTFILE WAVFILE OUTPUTNAME`
 
+### Web API
+
+This page lists only the most basic commands.
+
+For more information about how the command line interface works consult the interactive [API Documentation](https://readalong-studio.herokuapp.com/api/v1/docs).
+
+For information on spinning up your own dev Web API server locally, have a look at [web\_api.py](readalongs/web_api.py).
+
+#### /langs
+
+To query a list of available languages in the ReadAlong Studio API, send a GET request to https://readalongs-studio.herokuapp.com/api/v1/langs
+
+#### /assemble
+
+This endpoint is a remote procedural call that assembles the data needed to build a readalong using the JavaScript-based [SoundSwallower decoder](https://github.com/ReadAlongs/SoundSwallower). It is an endpoint that accepts POST requests with either plaintext or XML input. Please see the [documentation](https://readalong-studio.herokuapp.com/api/v1/docs) for more information.
+
 ### Studio web application
 
-ReadAlong-Studio has a web interface for creating interactive audiobooks. The web app can be served by first installing ReadAlong-Studio and then running `readalongs run`. A web app will then be available on port 5000.
+ReadAlong-Studio has a web interface for creating interactive audiobooks. The web app can be served by first installing ReadAlong-Studio and then running `python3 run.py`. A web app will then be available on port 5000.
 
 ### Docker
 
@@ -202,7 +219,17 @@ Feel free to dive in! [Open an issue](https://github.com/ReadAlongs/Studio/issue
 
 This repo follows the [Contributor Covenant](http://contributor-covenant.org/version/1/3/0/) Code of Conduct.
 
-Have a look at [Contributing.md](Contributing.md) for help getting started.
+You can install our standard Git hooks by running these commands in your sandbox:
+
+```sh
+pip install -r requirements.dev.txt
+pre-commit install
+gitlint install-hook
+```
+
+Have a look at [Contributing.md](Contributing.md) for the full details on the
+Conventional Commit messages we prefer, our code formatting conventions, and
+our Git hooks.
 
 ### Contributors
 
@@ -218,6 +245,24 @@ Here is a partial list:
 
 Project web page: [ReadAlong Studio: Application for Indigenous audiobooks and videos project](https://nrc.canada.ca/en/research-development/research-collaboration/programs/readalong-studio-application-indigenous-audiobooks-videos-project)
 
+### Citation
+
+if you use this software in a project of yours and write about it, please cite
+us using the following:
+
+```
+@inproceedings{Littell_ReadAlong_Studio_Practical_2022,
+  author = {Littell, Patrick and Joanis, Eric and Pine, Aidan and Tessier, Marc and Huggins-Daines, David and Torkornoo, Delasie},
+  booktitle = {Proceedings of SIGUL2022 @LREC2022},
+  title = {{ReadAlong Studio: Practical Zero-Shot Text-Speech Alignment for Indigenous Language Audiobooks}},
+  year = {2022},
+  month = {6},
+  pages = {23--32},
+  publisher = {European Language Resources Assiciation (ELRA)},
+  url = {http://www.lrec-conf.org/proceedings/lrec2022/workshops/SIGUL/pdf/2022.sigul-1.4.pdf}
+}
+```
+
 ## License
 
-[MIT](LICENSE) © 2019-2021 David Huggins-Daines and National Research Council Canada
+[MIT](LICENSE) © 2019-2022 David Huggins-Daines and National Research Council Canada
diff --git a/docs/README.md b/docs/README.md
index 873aa2c4..75d6e6e9 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -5,28 +5,35 @@
 To contribute to the ReadAlongs Studio documentation, edit the `.rst` files in
 this folder.
 
-## Build the documentation locally
+## Build and view the documentation locally
 
-To build the documention for local inspection, run one of these commands,
-which will build the documentation in `./_build/html/` or
-`./_build/singlehtml/`:
+To build the documentation and review your own changes locally:
 
-    make html  # multi-page HTML site
-    make singlehtml  # single-page HTML document
+1. Install the required build software, Sphinx:
 
-## View the documentation locally
+       pip install -r requirements.txt
 
-To view the documentation, run an HTTP server in the directory where the build
-is found, e.g.,
+2. Install Studio itself
 
-    cd _build/html
-    python3 -m http.server
+       (cd .. && pip install -e .)
 
-and navigate to http://127.0.0.1:8000 to view the results (or whatever port
-your local web server displays).
+3. Run one of these commands, which will build the documentation in `./_build/html/`
+   or `./_build/singlehtml/`:
+
+       make html  # multi-page HTML site
+       make singlehtml  # single-page HTML document
+
+2. View the documentation by running an HTTP server in the directory where the
+   build is found, e.g.,
+
+       cd _build/html
+       python3 -m http.server
+
+   and navigating to http://127.0.0.1:8000 (or whatever port your local web
+   server displays).
 
 ## Publish the changes
 
 Once your changes are pushed to GitHub and merged into `master` via a Pull
-Request, the documentation will be automatically built and published to
+Request, the documentation will automatically get built and published to
 https://readalong-studio.readthedocs.io/en/latest/
diff --git a/docs/advanced-use.rst b/docs/advanced-use.rst
index e60450c0..2f6e3e23 100644
--- a/docs/advanced-use.rst
+++ b/docs/advanced-use.rst
@@ -1,7 +1,24 @@
 .. _advanced-use:
 
-Data pre-processing and troubleshooting
-=======================================
+Advanced topics
+===============
+
+.. _adding-a-lang:
+
+Adding a new language to g2p
+----------------------------
+
+If you want to align an audio book in a language that is not yet supported by
+the g2p library, you will have to write your own g2p mapping for that language.
+
+References:
+ - The `g2p library <https://github.com/roedoejet/g2p>`__ and its
+   `documentation <https://g2p.readthedocs.io/>`__.
+ - The `7-part blog post on creating g2p mappings <https://blog.mothertongues.org/g2p-background/>`__ on the `Mother Tongues Blog <https://blog.mothertongues.org/>`__.
+
+Once you have created a g2p mapping for your language, please consider
+`contributing it to the project <https://blog.mothertongues.org/g2p-contributing/>`__
+so others can also benefit from your work!
 
 Pre-processing your data
 ------------------------
@@ -77,86 +94,3 @@ pre-processing.
 
    num2words 123456789
    one hundred and twenty-three million, four hundred and fifty-six thousand, seven hundred and eighty-nine
-
-Troubleshooting
----------------
-
-Here are three types of common errors you may encounter when trying to
-run ReadAlongs, and ways to debug them.
-
-Phones missing in the acoustic model
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-You may get an error that looks like this:|image1|
-
-The general structure of your error would look like
-``Phone [character] is missing in the acoustic model; word [index] ignored``
-This error is most likely caused not by a bug in your ReadAlong input
-files, but by an error in one of your g2p mappings. The error message is
-saying that there is a character in your ReadAlong text that is not
-being properly converted to English-arpabet (eng-arpabet), which is the
-language ReadAlong uses to map text to sound. Thus, ReadAlong cannot
-match your text to a corresponding sound (phone) in your audio file
-because it cannot understand what sound the text is meant to represent.
-Follow these steps to debug the issue **in g2p**.
-
-1. Identify which characters in each line of the error message are
-   **not** being converted to eng-arpabet. These will either be:
-
-   a. characters that are not in caps (for example ``g`` in the string
-      ``gUW`` in the error message shown above.)
-   b. a character not traditionally used in English (for example é or Ŧ,
-      or ``ʰ`` in the error message shown above.) You can confirm you
-      have isolated the right characters by ensuring every other
-      character in your error message appears as an **output** in the
-      `eng-ipa-to-arpabet
-      mapping <https://github.com/roedoejet/g2p/blob/master/g2p/mappings/langs/eng/eng_ipa_to_arpabet.json>`__.
-      These are the problematic characters we need to debug in the error
-      message shown above: ``g`` and ``ʰ``.
-
-2. Once you have isolated the characters that are not being converted to
-   eng-arpabet, you are ready to begin debugging the issue. Start at
-   step 3 below for each problematic character.
-
-3. Our next step is to identify which mapping is converting the
-   problematic characters incorrectly. Most of the time, the issue will
-   be in either the first or the second of the following mappings:
-
-   i.   *xyz-ipa* (where xyz is the ISO language code for your mapping)
-   ii.  *xyz-equiv* (if you have one)
-   iii. *xyz-ipa_to_eng-ipa* (this mapping must be generated
-        automatically in g2p. Refer //here_in_the_guide to see how to do
-        this.)
-   iv.  `eng-ipa-to-arpabet
-        mapping <https://github.com/roedoejet/g2p/blob/master/g2p/mappings/langs/eng/eng_ipa_to_arpabet.json>`__
-        (The issue is rarely found here, but it doesn’t hurt to check.)
-
-4. Find a word in your text that uses the problematic character. For the
-   sake of example, let us assume the character I am debugging is ``g``,
-   that appears in the word "dog", in language "xyz".
-
-5. Make sure you are in the g2p repository and run the word through
-   ``g2p convert`` to confirm you have isolated the correct characters
-   to debug: ``g2p convert dog xyz eng-arpabet``. Best practice is to
-   copy+paste the word directly from your text instead of retyping it.
-   Make sure to use the ISO code for your language in place of "xyz".
-   *If the word converts cleanly into eng-arpabet characters, your issue
-   does not lie in your mapping. //Refer to other potential RA issues*
-
-6. From the result of the command run in 5, note the characters that do
-   **not** appear as **inputs** in the `eng-ipa-to-arpabet
-   mapping <https://github.com/roedoejet/g2p/blob/master/g2p/mappings/langs/eng/eng_ipa_to_arpabet.json>`__.
-   These are the characters that have not been converted into characters
-   that eng-ipa-to-arpabet can read. These should be the same characters
-   you identified in step 2.
-
-7. Run ``g2p convert dog xyz xyz-ipa``. Ensure the result is what you
-   expect. If not, your error may arise from a problem in this mapping.
-   refer_to_g2p_troubleshooting. If the result is what you expect,
-   continue to the next step.
-
-8. Note the result from running the command in 7. Check that the
-   characters [TODO-fix this text] (appear/being mapped by generated --
-   use debugger or just look at mapping)
-
-.. |image1| image:: https://i.imgur.com/vKPhTud.png
diff --git a/docs/cli-guide.rst b/docs/cli-guide.rst
index 6477aa37..956b6e12 100644
--- a/docs/cli-guide.rst
+++ b/docs/cli-guide.rst
@@ -6,10 +6,10 @@ Command line interface (CLI) user guide
 This page contains guidelines on using the ReadAlongs CLI. See also
 :ref:`cli-ref` for the full CLI reference.
 
-The ReadAlongs CLI has two main commands: ``readalongs prepare`` and
+The ReadAlongs CLI has two main commands: ``readalongs make-xml`` and
 ``readalongs align``.
 
-- If your data is a plain text file, you can run ``prepare`` to turn it into
+- If your data is a plain text file, you can run ``make-xml`` to turn it into
   XML, which you can then align with ``align``. Doing this in two steps allows
   you to modify the XML file before aligning it (e.g., to mark that some text is
   in a different language, to flag some do-not-align text, or to drop anchors
@@ -22,7 +22,7 @@ The ReadAlongs CLI has two main commands: ``readalongs prepare`` and
 Two additional commands are sometimes useful: ``readalongs tokenize`` and
 ``readalongs g2p``.
 
-- ``tokenize`` takes the output of ``prepare`` and tokenizes it, wrapping each
+- ``tokenize`` takes the output of ``make-xml`` and tokenizes it, wrapping each
   word in the text in a ``<w>`` element.
 
 - ``g2p`` takes the output of ``tokenize`` and mapping each word to its
@@ -33,12 +33,12 @@ Two additional commands are sometimes useful: ``readalongs tokenize`` and
 The result of ``tokenize`` or ``g2p`` can be fixed manually if necessary and
 then used as input to ``align``.
 
-Getting from TXT to XML with readalongs prepare
+Getting from TXT to XML with readalongs make-xml
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Run :ref:`cli-prepare` to prepare an XML file for ``align`` from a TXT file.
+Run :ref:`cli-make-xml` to make the XML file for ``align`` from a TXT file.
 
-``readalongs prepare [options] [story.txt] [story.xml]``
+``readalongs make-xml [options] [story.txt] [story.xml]``
 
 ``[story.txt]``: path to the plain text input file (TXT)
 
@@ -60,19 +60,19 @@ breaks are marked by two blank lines.
 |                                   | and will be aligning repeatedly)              |
 +-----------------------------------+-----------------------------------------------+
 | ``-h, --help``                    | Displays CLI guide for                        |
-|                                   | ``prepare``                                   |
+|                                   | ``make-xml``                                   |
 +-----------------------------------+-----------------------------------------------+
 
 The ``-l, --language`` argument requires a language’s 3 character `ISO
 code <https://en.wikipedia.org/wiki/ISO_639-3>`__ as an argument.
 
-The languages supported by RAS can be listed by running ``readalongs prepare -h``
-and they can also be found in the :ref:`cli-prepare` reference.
+The languages supported by RAS can be listed by running ``readalongs make-xml -h``
+and they can also be found in the :ref:`cli-make-xml` reference.
 
 So, a full command for a story in Algonquin, with an implicit g2p fallback to
 Undetermined, would be something like:
 
-``readalongs prepare -l alq Studio/story.txt Studio/story.xml``
+``readalongs make-xml -l alq Studio/story.txt Studio/story.xml``
 
 The generated XML will be parsed in to sentences. At this stage you can
 edit the XML to have any modifications, such as adding ``do-not-align``
@@ -92,7 +92,7 @@ element in the xml (word, sentence, paragraph, or page).
 
    <w do-not-align="true" id="t0b0d0p0s0w0">dog</w>
 
-If you have already run ``readalongs prepare``, there will be
+If you have already run ``readalongs make-xml``, there will be
 documentation for DNA text in comments at the beginning of the generated
 xml file.
 
@@ -155,7 +155,7 @@ created, as ``output_base*``
 |                                   | configuration file (in JSON                   |
 |                                   | format)                                       |
 +-----------------------------------+-----------------------------------------------+
-| ``--g2p-verbose``                 | Display verbose g2p error messages            |
+| ``--debug-g2p``                   | Display verbose g2p debugging messages        |
 +-----------------------------------+-----------------------------------------------+
 | ``-s, --save-temps``              | Save intermediate stages of                   |
 |                                   | processing and temporary files                |
@@ -184,13 +184,35 @@ A full command could be something like:
 - With other extensions, the beginning of the file is examined to
   automatically determine if it's XML or plain text.
 
-The config.json file
-~~~~~~~~~~~~~~~~~~~~
+Supported languages
+~~~~~~~~~~~~~~~~~~~
 
-Some additional parameters can be specified via a config file: create a JSON
-file called ``config.json``, possibly in the same folder as your other ReadAlong
-input files for convenience. The config file currently accepts two components:
-adding images to your ReadAlongs, and DNA audio (see :ref:`dna`).
+The ``readalongs langs`` command can be used to list all supported languages.
+
+Here is that list at the time of compiling this documentation:
+
+.. command-output:: readalongs langs
+
+See :ref:`adding-a-lang` for references on adding new languages to that list.
+
+
+Adding titles, images and do-not-align segments via the config.json file
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some additional parameters can be specified via a config file: create
+a JSON file called ``config.json``, possibly in the same folder as
+your other ReadAlong input files for convenience. The config file
+currently accepts a few components: adding titles and headers, adding
+images to your ReadAlongs, and DNA audio (see :ref:`dna`).
+
+To add a title and headers to the output HTML, you can use the keys
+`"title"`, `"header"`, and `"subheader"`, for example::
+
+  {
+    "title": "My awesome read-along",
+    "header": "A story in my language",
+    "subheader": "Read by me"
+  }
 
 To add images, indicate the page number as the key, and the name of the image
 file as the value, as an entry in the ``"images"`` dictionary.
@@ -279,7 +301,7 @@ falling back to ``eng`` and then ``und`` (see below) when needed.
 
 .. code-block:: bash
 
-   readalongs prepare -l fra,eng myfile.txt myfile.xml
+   readalongs make-xml -l fra,eng myfile.txt myfile.xml
    readalongs align -l fra,eng myfile.txt myfile.wav output-dir
 
 The "Undetermined" language code: und
@@ -296,7 +318,7 @@ most text with a few foreign words without any manual intervention.
 
 Since we recommend systematically using ``und`` at the end of the cascade, it
 is now added by default after the languages specified with the ``-l``
-switch to both ``readalongs align`` and ``readalongs prepare``. Note that
+switch to both ``readalongs align`` and ``readalongs make-xml``. Note that
 adding other languages after ``und`` will have no effect, since the
 Undetermined mapping will map any string to valid ARPABET.
 
@@ -311,7 +333,7 @@ The warning messages issued by ``readalongs g2p`` and ``readalongs align``
 indicate which words are causing g2p problems and what fallbacks were tried.
 It can be worth inspecting to input text to fix any encoding or spelling
 errors highlighted by these warnings. More detailed messages can be
-produced by adding the ``--g2p-verbose`` switch, to obtain a lot more
+produced by adding the ``--debug-g2p`` switch, to obtain a lot more
 information about g2p'ing words in each language g2p was unsucessfully
 attempted.
 
@@ -325,7 +347,7 @@ The following series of commands:
 
 ::
 
-   readalongs prepare -l l1,l2 file.txt file.xml
+   readalongs make-xml -l l1,l2 file.txt file.xml
    readalongs tokenize file.xml file.tokenized.xml
    readalongs g2p file.tokenized.xml file.g2p.xml
    readalongs align file.g2p.xml file.wav output
@@ -354,7 +376,7 @@ Anchor syntax
 ^^^^^^^^^^^^^
 
 Anchors are inserted in the XML file (the output of
-``readalongs prepare``, ``readalongs tokenize`` or ``readalongs g2p``)
+``readalongs make-xml``, ``readalongs tokenize`` or ``readalongs g2p``)
 using the following syntax: ``<anchor time="3.42s"/>`` or
 ``<anchor time="3420ms"/>``. The time can be specified in seconds (this
 is the default) or milliseconds.
diff --git a/docs/cli-ref.rst b/docs/cli-ref.rst
index cb89f728..34afa0df 100644
--- a/docs/cli-ref.rst
+++ b/docs/cli-ref.rst
@@ -6,13 +6,14 @@ Command line interface (CLI) reference
 This page contains the full reference documentation for each command in the CLI.
 See also :ref:`cli-guide` for guidelines on using the CLI.
 
-The ReadAlongs CLI has four key commands:
+The ReadAlongs CLI has five key commands:
 
 - :ref:`cli-align`: full alignment pipeline, from plain text or XML to a
   viewable readalong
-- :ref:`cli-prepare`: convert a plain text file into XML, for align
-- :ref:`cli-tokenize`: tokenize a prepared XML file
+- :ref:`cli-make-xml`: convert a plain text file into XML, for align
+- :ref:`cli-tokenize`: tokenize an XML file
 - :ref:`cli-g2p`: g2p a tokenized XML file
+- :ref:`cli-langs`: list supported languages
 
 Each command can be run with ``-h`` or ``--help`` to display its usage manual,
 e.g., ``readalongs -h``, ``readalongs align --help``.
@@ -21,9 +22,9 @@ e.g., ``readalongs -h``, ``readalongs align --help``.
 .. click:: readalongs.cli:align
   :prog: readalongs align
 
-.. _cli-prepare:
-.. click:: readalongs.cli:prepare
-  :prog: readalongs prepare
+.. _cli-make-xml:
+.. click:: readalongs.cli:make-xml
+  :prog: readalongs make-xml
 
 .. _cli-tokenize:
 .. click:: readalongs.cli:tokenize
@@ -32,3 +33,7 @@ e.g., ``readalongs -h``, ``readalongs align --help``.
 .. _cli-g2p:
 .. click:: readalongs.cli:g2p
   :prog: readalongs g2p
+
+.. _cli-langs:
+.. click:: readalongs.cli:langs
+  :prog: readalongs langs
diff --git a/docs/conf.py b/docs/conf.py
index 3a58f054..af4cd1ef 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,7 +21,7 @@
 # -- Project information -----------------------------------------------------
 
 project = "ReadAlongs-Studio"
-copyright = "2019-2021 David Huggins-Daines and National Research Council Canada"
+copyright = "2019-2022 David Huggins-Daines and National Research Council Canada"
 author = "David Huggins-Daines, Eric Joanis, Patrick Littell, Aidan Pine"
 
 # The short X.Y version
@@ -45,6 +45,7 @@
     "sphinx.ext.todo",
     "sphinx.ext.coverage",
     "sphinx_click.ext",
+    "sphinxcontrib.programoutput",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
diff --git a/docs/index.rst b/docs/index.rst
index 3abf9e44..a99a1d92 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,7 +1,9 @@
-Welcome to ReadAlong-Studio's documentation!
-============================================
+Welcome to ReadAlong-Studio's documentation
+===========================================
 
-.. note:: ReadAlong-Studio is UNDER CONSTRUCTION and should not be expected to be fully documented or even work as expected! Check back soon for more information.
+Audiobook alignment for Indigenous languages
+
+This site provides the full user documentation for ReadAlongs-Studio.
 
 .. toctree::
    :maxdepth: 2
@@ -13,6 +15,7 @@ Welcome to ReadAlong-Studio's documentation!
    cli-ref
    outputs
    advanced-use
+   troubleshooting
 
 
 Indices and tables
diff --git a/docs/outputs.rst b/docs/outputs.rst
index 0d236f7e..e3098d9f 100644
--- a/docs/outputs.rst
+++ b/docs/outputs.rst
@@ -35,8 +35,8 @@ Below is an example of a minimal implementation in a basic standalone html page.
             <read-along text="assets/sample.xml" alignment="assets/sample.smil" audio="assets/sample.wav"></read-along>
         </body>
         <!-- The last step needed is to import the package -->
-       <script type="module" src='https://unpkg.com/@roedoejet/readalong@latest/dist/read-along/read-along.esm.js'></script>
-       <script nomodule src='https://unpkg.com/@roedoejet/readalong@latest/dist/read-along/read-along.js'></script>
+       <script type="module" src='https://unpkg.com/@roedoejet/readalong@^0.1.6/dist/read-along/read-along.esm.js'></script>
+       <script nomodule src='https://unpkg.com/@roedoejet/readalong@^0.1.6/dist/read-along/read-along.js'></script>
     </html>
 
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 8109c985..a85ae383 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,5 @@
 Sphinx
 guzzle_sphinx_theme
 sphinx-click
+sphinxcontrib-programoutput
 -r ../requirements.txt
diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst
new file mode 100644
index 00000000..6f395fb3
--- /dev/null
+++ b/docs/troubleshooting.rst
@@ -0,0 +1,102 @@
+.. _troubleshooting:
+
+.. note:: This troubleshooting guide is under construction.
+
+Troubleshooting
+===============
+
+Here are three types of common errors you may encounter when trying to
+run ReadAlongs, and ways to debug them.
+
+Phones missing in the acoustic model
+------------------------------------
+
+.. note:: Troubleshooting item under construction
+
+You may get an error that looks like this:|image1|
+
+The general structure of your error would look like
+``Phone [character] is missing in the acoustic model; word [index] ignored``
+This error is most likely caused not by a bug in your ReadAlong input
+files, but by an error in one of your g2p mappings. The error message is
+saying that there is a character in your ReadAlong text that is not
+being properly converted to English-arpabet (eng-arpabet), which is the
+language ReadAlong uses to map text to sound. Thus, ReadAlong cannot
+match your text to a corresponding sound (phone) in your audio file
+because it cannot understand what sound the text is meant to represent.
+Follow these steps to debug the issue **in g2p**.
+
+1. Identify which characters in each line of the error message are
+   **not** being converted to eng-arpabet. These will either be:
+
+   a. characters that are not in caps (for example ``g`` in the string
+      ``gUW`` in the error message shown above.)
+   b. a character not traditionally used in English (for example é or Ŧ,
+      or ``ʰ`` in the error message shown above.) You can confirm you
+      have isolated the right characters by ensuring every other
+      character in your error message appears as an **output** in the
+      `eng-ipa-to-arpabet
+      mapping <https://github.com/roedoejet/g2p/blob/master/g2p/mappings/langs/eng/eng_ipa_to_arpabet.json>`__.
+      These are the problematic characters we need to debug in the error
+      message shown above: ``g`` and ``ʰ``.
+
+2. Once you have isolated the characters that are not being converted to
+   eng-arpabet, you are ready to begin debugging the issue. Start at
+   step 3 below for each problematic character.
+
+3. Our next step is to identify which mapping is converting the
+   problematic characters incorrectly. Most of the time, the issue will
+   be in either the first or the second of the following mappings:
+
+   i.   *xyz-ipa* (where xyz is the ISO language code for your mapping)
+   ii.  *xyz-equiv* (if you have one)
+   iii. *xyz-ipa_to_eng-ipa* (this mapping must be generated
+        automatically in g2p. Refer //here_in_the_guide to see how to do
+        this.)
+   iv.  `eng-ipa-to-arpabet
+        mapping <https://github.com/roedoejet/g2p/blob/master/g2p/mappings/langs/eng/eng_ipa_to_arpabet.json>`__
+        (The issue is rarely found here, but it doesn’t hurt to check.)
+
+4. Find a word in your text that uses the problematic character. For the
+   sake of example, let us assume the character I am debugging is ``g``,
+   that appears in the word "dog", in language "xyz".
+
+5. Make sure you are in the g2p repository and run the word through
+   ``g2p convert`` to confirm you have isolated the correct characters
+   to debug: ``g2p convert dog xyz eng-arpabet``. Best practice is to
+   copy+paste the word directly from your text instead of retyping it.
+   Make sure to use the ISO code for your language in place of "xyz".
+   *If the word converts cleanly into eng-arpabet characters, your issue
+   does not lie in your mapping. //Refer to other potential RA issues*
+
+6. From the result of the command run in 5, note the characters that do
+   **not** appear as **inputs** in the `eng-ipa-to-arpabet
+   mapping <https://github.com/roedoejet/g2p/blob/master/g2p/mappings/langs/eng/eng_ipa_to_arpabet.json>`__.
+   These are the characters that have not been converted into characters
+   that eng-ipa-to-arpabet can read. These should be the same characters
+   you identified in step 2.
+
+7. Run ``g2p convert dog xyz xyz-ipa``. Ensure the result is what you
+   expect. If not, your error may arise from a problem in this mapping.
+   refer_to_g2p_troubleshooting. If the result is what you expect,
+   continue to the next step.
+
+8. Note the result from running the command in 7. Check that the
+   characters [TODO-fix this text] (appear/being mapped by generated --
+   use debugger or just look at mapping)
+
+.. |image1| image:: https://i.imgur.com/vKPhTud.png
+
+Type 2
+------
+
+.. note:: TODO
+
+Common error type 2...
+
+Type 3
+------
+
+.. note:: TODO
+
+Common error type 3...
diff --git a/misc-utils/README.md b/misc-utils/README.md
index ae01f8f9..27c01792 100644
--- a/misc-utils/README.md
+++ b/misc-utils/README.md
@@ -19,7 +19,7 @@ categories under "Sonority Hierarchy" to support other languages.
 Must be called manually after
 readalongs tokenize and before readalongs align or readalong g2p:
 
-    readalongs prepare -l my_lang file.txt file.xml
+    readalongs make-xml -l my_lang file.txt file.xml
     readalongs tokenize file.xml file-tok.xml
     ./syll_parse.py file-tok.xml file-tok-syll.xml
 
diff --git a/misc-utils/non-caching-server-3.7.py b/misc-utils/non-caching-server-3.7.py
index e9d47cf7..a1859494 100755
--- a/misc-utils/non-caching-server-3.7.py
+++ b/misc-utils/non-caching-server-3.7.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # This script is copied and modified from
 # https://github.com/python/cpython/blob/3.7/Lib/http/server.py
@@ -93,7 +93,7 @@
 #   refresh will not fetch manually updated pages.
 #
 #   Running this script in a root web site folder is equivalent to running
-#       python3 -m http.server
+#       python -m http.server
 #   in that folder, except that pages won't get cached.
 #
 # - Eric Joanis, 2021:
diff --git a/misc-utils/non-caching-server-3.9.py b/misc-utils/non-caching-server-3.9.py
index 07b92e16..3a740259 100755
--- a/misc-utils/non-caching-server-3.9.py
+++ b/misc-utils/non-caching-server-3.9.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # This script is copied and modified from
 # https://github.com/python/cpython/blob/3.9/Lib/http/server.py
@@ -93,7 +93,7 @@
 #   refresh will not fetch manually updated pages.
 #
 #   Running this script in a root web site folder is equivalent to running
-#       python3 -m http.server
+#       python -m http.server
 #   in that folder, except that pages won't get cached.
 #
 # - Eric Joanis, 2021:
diff --git a/misc-utils/syll_parse.py b/misc-utils/syll_parse.py
index db3a5060..2a6579c6 100755
--- a/misc-utils/syll_parse.py
+++ b/misc-utils/syll_parse.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 # Original Copyright and License from https://github.com/alexestes/SonoriPy:
 #
diff --git a/package.json b/package.json
deleted file mode 100644
index b153b672..00000000
--- a/package.json
+++ /dev/null
@@ -1,35 +0,0 @@
-{
-  "name": "readalongs",
-  "version": "1.0.0",
-  "description": "",
-  "main": "index.js",
-  "scripts": {
-    "test": "echo \"Error: no test specified\" && exit 1"
-  },
-  "husky": {
-    "hooks": {
-      "commit-msg": "commitlint -E HUSKY_GIT_PARAMS"
-    }
-  },
-  "commitlint": {
-    "extends": [
-      "@commitlint/config-conventional"
-    ]
-  },
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/ReadAlongs/Studio.git"
-  },
-  "keywords": [],
-  "author": "",
-  "license": "",
-  "bugs": {
-    "url": "https://github.com/ReadAlongs/Studio/issues"
-  },
-  "homepage": "https://github.com/ReadAlongs/Studio#readme",
-  "dependencies": {
-    "@commitlint/config-conventional": "^8.3.4",
-    "commitlint": "^8.3.5",
-    "husky": "^4.2.3"
-  }
-}
diff --git a/readalongs/_version.py b/readalongs/_version.py
index 6db21f19..42a9f433 100644
--- a/readalongs/_version.py
+++ b/readalongs/_version.py
@@ -1 +1 @@
-__version__ = "0.2.20211122"
+__version__ = "0.2.20220705"
diff --git a/readalongs/align.py b/readalongs/align.py
index 9b93ce0d..c9125f3d 100644
--- a/readalongs/align.py
+++ b/readalongs/align.py
@@ -4,13 +4,13 @@
 import io
 import os
 import shutil
+import sys
 from collections import defaultdict
 from dataclasses import dataclass
 from datetime import timedelta
-from typing import Dict, List, Union
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union
 
 import chevron
-import regex as re
 import soundswallower
 from lxml import etree
 from pydub import AudioSegment
@@ -42,7 +42,16 @@
 from readalongs.text.make_package import create_web_component_html
 from readalongs.text.make_smil import make_smil
 from readalongs.text.tokenize_xml import tokenize_xml
-from readalongs.text.util import parse_time, save_minimal_index_html, save_txt, save_xml
+from readalongs.text.util import (
+    get_word_text,
+    parse_time,
+    save_minimal_index_html,
+    save_txt,
+    save_xml,
+)
+
+MODEL_DIR = os.path.join(os.path.dirname(__file__), "static", "model")
+DEFAULT_ACOUSTIC_MODEL = "cmusphinx-en-us-5.2"
 
 
 @dataclass
@@ -62,11 +71,13 @@ class WordSequence:
     words: List
 
 
-def get_sequences(xml, xml_filename, unit="w", anchor="anchor") -> List[WordSequence]:
+def get_sequences(
+    xml, xml_filename="memory", unit="w", anchor="anchor"
+) -> List[WordSequence]:
     """Return the list of anchor-separated word sequences in xml
 
     Args:
-        xml (etree): xml structure in which to search for words and anchors
+        xml (etree.ElementTree): xml structure in which to search for words and anchors
         xml_filename (str): filename, used for error messages only
         unit (str): element tag of the word units
         anchor (str): element tag of the anchors
@@ -154,37 +165,27 @@ def split_silences(words: List[dict], final_end, excluded_segments: List[dict])
     _ = words.pop()
 
 
-def align_audio(  # noqa: C901
-    xml_path,
-    audio_path,
-    unit="w",
-    bare=False,
-    config=None,
-    save_temps=None,
-    verbose_g2p_warnings=False,
-):
-    """Align an XML input file to an audio file.
+def parse_and_make_xml(
+    xml_path: str,
+    config: dict,
+    save_temps: Optional[str] = None,
+    verbose_g2p_warnings: Optional[bool] = False,
+    output_orthography: str = "eng-arpabet",
+) -> etree.ElementTree:
+    """Parse XML input and run tokenization and G2P.
 
     Args:
         xml_path (str): Path to XML input file in TEI-like format
-        audio_path (str): Path to audio input. Must be in a format supported by ffmpeg
-        unit (str): Optional; Element to create alignments for, by default 'w'
-        bare (boolean): Optional;
-            If False, split silence into adjoining tokens (default)
-            If True, keep the bare tokens without adjoining silences.
-        config (object): Optional; ReadAlong-Studio configuration to use
+        config (dict): Optional; ReadAlong-Studio configuration to use
         save_temps (str): Optional; Save temporary files, by default None
         verbose_g2p_warnings (boolean): Optional; display all g2p errors and warnings
             iff True
 
     Returns:
-        Dict[str, List]: TODO
+        lxml.etree.ElementTree: Parsed and prepared XML
 
     Raises:
-        TODO
-    """
-    results: Dict[str, List] = {"words": [], "audio": None}
-
+        RuntimeError: If XML failed to parse"""
     # First do G2P
     try:
         xml = etree.parse(xml_path).getroot()
@@ -192,220 +193,295 @@ def align_audio(  # noqa: C901
         raise RuntimeError(
             "Error parsing XML input file %s: %s." % (xml_path, e)
         ) from e
-    if config and "images" in config:
+    if "images" in config:
         xml = add_images(xml, config)
-    if config and "xml" in config:
+    if "xml" in config:
         xml = add_supplementary_xml(xml, config)
     xml = tokenize_xml(xml)
-    if save_temps:
+    if save_temps is not None:
         save_xml(save_temps + ".tokenized.xml", xml)
-    results["tokenized"] = xml = add_ids(xml)
-    if save_temps:
+    xml = add_ids(xml)
+    if save_temps is not None:
         save_xml(save_temps + ".ids.xml", xml)
-    xml, valid = convert_xml(xml, verbose_warnings=verbose_g2p_warnings)
-    if save_temps:
+    xml, valid = convert_xml(
+        xml,
+        verbose_warnings=verbose_g2p_warnings,
+        output_orthography=output_orthography,
+    )
+    if save_temps is not None:
         save_xml(save_temps + ".g2p.xml", xml)
     if not valid:
         raise RuntimeError(
             "Some words could not be g2p'd correctly. Aborting. "
-            "Run with --g2p-verbose for more detailed g2p error logs."
+            "Run with --debug-g2p for more detailed g2p error logs."
         )
+    return xml
 
-    # Prepare the SoundsSwallower (formerly PocketSphinx) configuration
-    cfg = soundswallower.Decoder.default_config()
-    model_path = soundswallower.get_model_path()
-    cfg.set_boolean("-remove_noise", False)
-    cfg.set_boolean("-remove_silence", False)
-    cfg.set_string("-hmm", os.path.join(model_path, "en-us"))
-    # cfg.set_string('-samprate', "no no")
-    cfg.set_float("-beam", 1e-100)
-    cfg.set_float("-wbeam", 1e-80)
 
-    # Read the audio file
-    audio = read_audio_from_file(audio_path)
-    audio = audio.set_channels(1).set_sample_width(2)
-    audio_length_in_ms = len(audio.raw_data)
-    #  Downsampling is (probably) not necessary
-    cfg.set_float("-samprate", audio.frame_rate)
+def create_asr_config(
+    config: dict,
+    audio: AudioSegment,
+    save_temps: Optional[str] = None,
+    debug_aligner: Optional[bool] = False,
+    alignment_mode: str = "auto",
+) -> soundswallower.Config:
+    """Create the base SoundSwallower (formerly PocketSphinx) configuration.
 
-    # Process audio, silencing or removing any DNA segments
-    dna_segments = []
-    removed_segments = []
-    if config and "do-not-align" in config:
-        # Sort un-alignable segments and join overlapping ones
-        dna_segments = sort_and_join_dna_segments(config["do-not-align"]["segments"])
-        method = config["do-not-align"].get("method", "remove")
-        # Determine do-not-align method
-        if method == "mute":
-            dna_method = mute_section
-        elif method == "remove":
-            dna_method = remove_section
-        else:
-            LOGGER.error("Unknown do-not-align method declared")
-        # Process audio and save temporary files
-        if method in ("mute", "remove"):
-            processed_audio = audio
-            # Process the DNA segments in reverse order so we don't have to correct
-            # for previously processed ones when using the "remove" method.
-            for seg in reversed(dna_segments):
-                processed_audio = dna_method(
-                    processed_audio, int(seg["begin"]), int(seg["end"])
-                )
-            if save_temps:
-                _, ext = os.path.splitext(audio_path)
-                try:
-                    processed_audio.export(
-                        save_temps + "_processed" + ext, format=ext[1:]
-                    )
-                except CouldntEncodeError:
-                    try:
-                        os.remove(save_temps + "_processed" + ext)
-                    except BaseException:
-                        pass
-                    LOGGER.warning(
-                        f"Couldn't find encoder for '{ext[1:]}', defaulting to 'wav'"
-                    )
-                    processed_audio.export(save_temps + "_processed" + ".wav")
-            removed_segments = dna_segments
-        audio_data = processed_audio
+    Args:
+        config (dict): ReadAlong-Studio configuration to use.
+        audio (AudioSegment): Audio input from which to take parameters.
+        save_temps (str): Optional; Prefix for saving temporary files, by default None.
+        debug_aligner (boolean): Optional; Output debugging info from the aligner.
+        alignment_mode (str): Optional, controls the decoder beam width
+
+    Returns:
+        soundswallower.Config: Basic configuration."""
+    asr_config = soundswallower.Config()
+    acoustic_model = config.get(
+        "acoustic_model", os.path.join(MODEL_DIR, DEFAULT_ACOUSTIC_MODEL)
+    )
+    asr_config["hmm"] = acoustic_model
+    if alignment_mode == "strict":
+        asr_config["beam"] = 1e-100
+        asr_config["pbeam"] = 1e-100
+        asr_config["wbeam"] = 1e-80
+    elif alignment_mode == "moderate":
+        asr_config["beam"] = 1e-200
+        asr_config["pbeam"] = 1e-200
+        asr_config["wbeam"] = 1e-160
+    elif alignment_mode == "loose":
+        asr_config["beam"] = 0
+        asr_config["pbeam"] = 0
+        asr_config["wbeam"] = 0
     else:
-        audio_data = audio
+        assert False and "invalid alignment_mode value"
 
-    # Initialize the SoundSwallower decoder with the sample rate from the audio
-    frame_points = int(cfg.get_float("-samprate") * cfg.get_float("-wlen"))
+    if debug_aligner:
+        # With --debug-aligner, we display the SoundSwallower logs on
+        # screen and set them to maximum strength
+        asr_config["loglevel"] = "DEBUG"
+    else:
+        # Otherwise, we enable logging and direct it to a file if
+        # saving temporary files
+        if save_temps is not None and (sys.platform not in ("win32", "cygwin")):
+            # With --save-temps, we save the SoundSwallower logs to a file.
+            # This is buggy on Windows, so we don't do it on Windows variants
+            # (NOTE: should be fixed in SoundSwallower 0.3 though)
+            ss_log = save_temps + ".soundswallower.log"
+            asr_config["logfn"] = ss_log
+            asr_config["loglevel"] = "INFO"
+        # And otherwise the default is fine (only error messages are printed)
+
+    # Set sampling rate based on audio (FIXME: this may cause problems
+    # later on if it is too low)
+    asr_config["samprate"] = audio.frame_rate
+    # Set the minimum FFT size (no longer necessary since
+    # SoundSwallower 0.2, but we keep this here for compatibility with
+    # old versions in case we need to debug things)
+    frame_points = int(asr_config["samprate"] * asr_config["wlen"])
     fft_size = 1
     while fft_size < frame_points:
         fft_size = fft_size << 1
-    cfg.set_int("-nfft", fft_size)
-    frame_size = 1.0 / cfg.get_int("-frate")
+    asr_config["nfft"] = fft_size
 
-    # Note: the frames are typically 0.01s long (i.e., the frame rate is typically 100),
-    # while the audio segments manipulated using pydub are sliced and accessed in
-    # millisecond intervals. For audio segments, the ms slice assumption is hard-coded
-    # all over, while frames_to_time() is used to convert segment boundaries returned by
-    # soundswallower, which are indexes in frames, into durations in seconds.
-    def frames_to_time(frames):
-        return frames * frame_size
+    # Disable VAD
+    asr_config["remove_noise"] = False
 
-    # Extract the list of sequences of words in the XML
-    word_sequences = get_sequences(xml, xml_path, unit=unit)
-    end = 0
-    for i, word_sequence in enumerate(word_sequences):
+    return asr_config
 
-        i_suffix = "" if i == 0 else "." + str(i + 1)
 
-        # Generate dictionary and FSG for the current sequence of words
-        dict_data = make_dict(word_sequence.words, xml_path, unit=unit)
-        if save_temps:
-            dict_file = io.open(save_temps + ".dict" + i_suffix, "wb")
-        else:
-            dict_file = PortableNamedTemporaryFile(
-                prefix="readalongs_dict_", delete=False
-            )
-        dict_file.write(dict_data.encode("utf-8"))
-        dict_file.close()
+def read_noisedict(asr_config: soundswallower.Config) -> Set[str]:
+    """Read the list of noise words from the acoustic model.
 
-        fsg_data = make_fsg(word_sequence.words, xml_path)
-        if save_temps:
-            fsg_file = io.open(save_temps + ".fsg" + i_suffix, "wb")
-        else:
-            fsg_file = PortableNamedTemporaryFile(
-                prefix="readalongs_fsg_", delete=False
-            )
-        fsg_file.write(fsg_data.encode("utf-8"))
-        fsg_file.close()
+    Args:
+        asr_config (soundswallower.Config): ASR configuration.
+    Returns:
+        Set[str]: Set of noise words from noisedict, or a default set
+            if it could not be found.
+    """
+    try:
+        noisewords = set()
+        acoustic_model = asr_config["hmm"]
+        with open(
+            os.path.join(acoustic_model, "noisedict"), "rt", encoding="utf-8"
+        ) as dictfh:
+            for line in dictfh:
+                if line.startswith("##") or line.startswith(";;"):
+                    continue
+                noisewords.add(line.strip().split()[0])
+    except FileNotFoundError:
+        LOGGER.warning("Could not find noisedict, using defaults")
+        noisewords = {"<sil>", "[NOISE]"}
 
-        # Extract the part of the audio corresponding to this word sequence
-        audio_segment = extract_section(
-            audio_data, word_sequence.start, word_sequence.end
-        )
-        if save_temps and audio_segment is not audio_data:
-            write_audio_to_file(audio_segment, save_temps + ".wav" + i_suffix)
-
-        # Configure soundswallower for this sequence's dict and fsg
-        cfg.set_string("-dict", dict_file.name)
-        cfg.set_string("-fsg", fsg_file.name)
-        ps = soundswallower.Decoder(cfg)
-        # Align this word sequence
-        ps.start_utt()
-        ps.process_raw(audio_segment.raw_data, no_search=False, full_utt=True)
-        ps.end_utt()
-
-        if not ps.seg():
-            raise RuntimeError(
-                "Alignment produced no segments, "
-                "please examine dictionary and input audio and text."
-            )
+    return noisewords
 
-        # List of removed segments for the sequence we are currently processing
-        curr_removed_segments = dna_union(
-            word_sequence.start, word_sequence.end, audio_length_in_ms, removed_segments
-        )
 
-        prev_segment_count = len(results["words"])
-        for seg in ps.seg():
-            if seg.word in ("<sil>", "[NOISE]"):
-                continue
-            start = frames_to_time(seg.start_frame)
-            end = frames_to_time(seg.end_frame + 1)
-            # change to ms
-            start_ms = start * 1000
-            end_ms = end * 1000
-            if curr_removed_segments:
-                start_ms += calculate_adjustment(start_ms, curr_removed_segments)
-                end_ms += calculate_adjustment(end_ms, curr_removed_segments)
-                start_ms, end_ms = correct_adjustments(
-                    start_ms, end_ms, curr_removed_segments
-                )
-                # change back to seconds to write to smil
-                start = start_ms / 1000
-                end = end_ms / 1000
-            results["words"].append({"id": seg.word, "start": start, "end": end})
-            LOGGER.info("Segment: %s (%.3f : %.3f)", seg.word, start, end)
-        aligned_segment_count = len(results["words"]) - prev_segment_count
-        if aligned_segment_count != len(word_sequence.words):
-            LOGGER.warning(
-                f"Word sequence {i+1} had {len(word_sequence.words)} tokens "
-                f"but produced {aligned_segment_count} segments. "
-                "Check that the anchors are well positioned or "
-                "that the audio corresponds to the text."
-            )
-    final_end = end
+def process_dna(
+    dna_config: Dict[str, Any],
+    audio: AudioSegment,
+    audio_path: Optional[str] = None,
+    save_temps: Optional[str] = None,
+) -> Tuple[AudioSegment, List[dict], List[dict]]:
+    """Apply do-not-align processing to audio.
 
-    if len(results["words"]) == 0:
-        raise RuntimeError(
-            "Alignment produced only noise or silence segments, "
-            "please verify that the text is an actual transcript of the audio."
-        )
-    if len(results["words"]) != len(results["tokenized"].xpath("//" + unit)):
-        LOGGER.warning(
-            "Alignment produced a different number of segments and tokens than "
-            "were in the input. Sequences between some anchors probably did not "
-            "align successfully. Look for more anchors-related warnings above in the log."
-        )
+    Args:
+        dna_config (dict): Do-not-align configuration, containing at least "segments" and "method".
+        audio (AudioSegment): Original audio segment.
+        audio_path (str): Optional; Path from which audio was loaded (needed for save_temps).
+        save_temps (str): Optional; Prefix for saving temporary files, by default None.
 
-    if not bare:
-        # Take all the boundaries (anchors) around segments and add them as DNA
-        # segments for the purpose of splitting silences
-        dna_for_silence_splitting = copy.deepcopy(dna_segments)
-        last_end = None
-        for seq in word_sequences:
-            if last_end or seq.start:
-                dna_for_silence_splitting.append(
-                    {"begin": (last_end or seq.start), "end": (seq.start or last_end)}
+    Returns:
+        Tuple[AudioSegment, List[dict], List[dict]]: Processed audio
+            segment, list of segments marked do-not-align, list of segments
+            actually removed.
+    """
+    # Sort un-alignable segments and join overlapping ones
+    dna_segments = sort_and_join_dna_segments(dna_config["segments"])
+    method = dna_config.get("method", "remove")
+    # Determine do-not-align method
+    if method == "mute":
+        dna_method = mute_section
+    elif method == "remove":
+        dna_method = remove_section
+    else:
+        LOGGER.error("Unknown do-not-align method declared")
+    # Process audio and save temporary files
+    if method in ("mute", "remove"):
+        processed_audio = audio
+        # Process the DNA segments in reverse order so we don't have to correct
+        # for previously processed ones when using the "remove" method.
+        for dna_seg in reversed(dna_segments):
+            processed_audio = dna_method(
+                processed_audio, int(dna_seg["begin"]), int(dna_seg["end"])
+            )
+        if save_temps is not None:
+            assert audio_path is not None
+            _, ext = os.path.splitext(audio_path)
+            try:
+                processed_audio.export(save_temps + "_processed" + ext, format=ext[1:])
+            except CouldntEncodeError:
+                try:
+                    os.remove(save_temps + "_processed" + ext)
+                except BaseException:  # Ignore Windows file removal failures
+                    pass
+                LOGGER.warning(
+                    f"Couldn't find encoder for '{ext[1:]}', defaulting to 'wav'"
                 )
-            last_end = seq.end
-        if last_end:
-            dna_for_silence_splitting.append({"begin": last_end, "end": last_end})
-        dna_for_silence_splitting = sort_and_join_dna_segments(
-            dna_for_silence_splitting
-        )
+                processed_audio.export(save_temps + "_processed" + ".wav")
+        removed_segments = dna_segments
+    return processed_audio, dna_segments, removed_segments
+
+
+def align_sequence(
+    audio_data: AudioSegment,
+    word_sequence: WordSequence,
+    asr_config: soundswallower.Config,
+    xml_path: str,
+    i: int,
+    unit: Optional[str] = "w",
+    save_temps: Optional[str] = None,
+) -> AudioSegment:
+    """Run alignment for a word sequence.
 
-        split_silences(results["words"], final_end, dna_for_silence_splitting)
+    Args:
+        audio_data (AudioSegment): Full input audio.
+        word_sequence (WordSequence): Sequence of units to align.
+        asr_config (soundswallower.Config): Aligner configuration.
+        unit (str): Name of unit we are aligning.
+        xml_path (str): Path to input XML file.
+        i (int): Index of this sequence in the full file.
+
+        save_temps (str): Optional; Prefix for saving temporary files,
+            or None to not save them.
+
+    Returns:
+        Iterable[soundswallower.Seg]: Word (or other unit) alignments.
+
+    Raises:
+        RuntimeError: If alignment fails (TODO: figure out why).
+    """
+    i_suffix = "" if i == 0 else "." + str(i + 1)
+
+    # Generate dictionary and FSG for the current sequence of words
+    dict_data = make_dict(word_sequence.words, xml_path, unit=unit)
+    if save_temps is not None:
+        dict_file = io.open(save_temps + ".dict" + i_suffix, "wb")
+    else:
+        dict_file = PortableNamedTemporaryFile(prefix="readalongs_dict_", delete=True)
+    dict_file.write(dict_data.encode("utf-8"))
+    dict_file.close()
+
+    fsg_data = make_fsg(word_sequence.words, xml_path)
+    if save_temps is not None:
+        fsg_file = io.open(save_temps + ".fsg" + i_suffix, "wb")
+    else:
+        fsg_file = PortableNamedTemporaryFile(prefix="readalongs_fsg_", delete=True)
+    fsg_file.write(fsg_data.encode("utf-8"))
+    fsg_file.close()
+
+    # Extract the part of the audio corresponding to this word sequence
+    audio_segment = extract_section(audio_data, word_sequence.start, word_sequence.end)
+    if save_temps is not None and audio_segment is not audio_data:
+        write_audio_to_file(audio_segment, save_temps + ".wav" + i_suffix)
+
+    # Configure soundswallower for this sequence's dict and fsg
+    asr_config["dict"] = dict_file.name
+    asr_config["fsg"] = fsg_file.name
+
+    ps = soundswallower.Decoder(asr_config)
+    # Align this word sequence
+    ps.start_utt()
+    ps.process_raw(audio_segment.raw_data, no_search=False, full_utt=True)
+    ps.end_utt()
+
+    return ps.seg
+
+
+def process_segmentation(
+    segmentation: Iterable[soundswallower.Seg],
+    curr_removed_segments: List[dict],
+    noisewords: Set[str],
+    frame_size: float,
+    debug_aligner: Optional[bool] = False,
+) -> List[Dict[str, Any]]:
+    """Correct output alignments based on do-not-align segments."""
+    aligned_words: List[Dict[str, Any]] = []
+    for word_seg in segmentation:
+        if word_seg.text in noisewords:
+            continue
+        start = word_seg.start
+        end = word_seg.start + word_seg.duration
+        # round to milliseconds to avoid imprecisions
+        start_ms = round(start * 1000)
+        end_ms = round(end * 1000)
+        # possibly adjust for removed sections
+        if curr_removed_segments:
+            start_ms += calculate_adjustment(start_ms, curr_removed_segments)
+            end_ms += calculate_adjustment(end_ms, curr_removed_segments)
+            start_ms, end_ms = correct_adjustments(
+                start_ms, end_ms, curr_removed_segments
+            )
+        # change back to seconds
+        start = start_ms / 1000
+        end = end_ms / 1000
+        if aligned_words:
+            assert start >= aligned_words[-1]["end"]
+        aligned_words.append({"id": word_seg.text, "start": start, "end": end})
+        if debug_aligner:
+            LOGGER.info("Segment: %s (%.3f : %.3f)", word_seg.text, start, end)
+    return aligned_words
+
+
+def insert_silence(
+    results: Dict[str, Any],
+    audio: AudioSegment,
+    xml_path: Optional[str] = "XML Input",
+):
+    """Insert the required silences in the audio stream."""
     words_dict = {
         x["id"]: {"start": x["start"], "end": x["end"]} for x in results["words"]
     }
-    silence_offsets = defaultdict(int)
+    silence_offsets: defaultdict = defaultdict(int)
     silence = 0
     if results["tokenized"].xpath("//silence"):
         endpoint = 0
@@ -446,11 +522,329 @@ def frames_to_time(frames):
             word["start"] += silence_offsets[word["id"]]
             word["end"] += silence_offsets[word["id"]]
         results["audio"] = audio
+
+
+def align_audio(
+    xml_path: str,
+    audio_path: str,
+    *,  # force the remaining arguments to be passed by name
+    unit: Optional[str] = "w",
+    bare: Optional[bool] = False,
+    config: Optional[dict] = None,
+    save_temps: Optional[str] = None,
+    verbose_g2p_warnings: Optional[bool] = False,
+    debug_aligner: Optional[bool] = False,
+    output_orthography: str = "eng-arpabet",
+    alignment_mode: str = "auto",
+):
+    """Align an XML input file to an audio file.
+
+    Args:
+        xml_path (str): Path to XML input file in TEI-like format
+        audio_path (str): Path to audio input. Must be in a format supported by ffmpeg
+        unit (str): Optional; Element to create alignments for, by default 'w'
+        bare (boolean): Optional;
+            If False, split silence into adjoining tokens (default)
+            If True, keep the bare tokens without adjoining silences.
+        config (dict): Optional; ReadAlong-Studio configuration to use
+        save_temps (str): Optional; Prefix for saving temporary files, or None if
+            temporary files are not to be saved.
+        verbose_g2p_warnings (boolean): Optional; display all g2p errors and warnings
+            iff True
+        debug_aligner (boolean): Optional, output debugging info from the aligner.
+        alignment_mode (str): Optional, controls the decoder beam width
+
+    Returns:
+        Dict[str, Any]: TODO
+
+    Raises:
+        TODO
+    """
+    results: Dict[str, Any] = {"words": [], "audio": None}
+    if config is None:
+        config = {}
+
+    xml = parse_and_make_xml(
+        xml_path=xml_path,
+        config=config,
+        verbose_g2p_warnings=verbose_g2p_warnings,
+        save_temps=save_temps,
+        output_orthography=output_orthography,
+    )
+    results["tokenized"] = xml
+
+    # Read the audio file
+    audio = read_audio_from_file(audio_path)
+    audio = audio.set_channels(1).set_sample_width(2)
+    audio_length_in_ms = len(audio.raw_data)
+
+    # Expand the list of alignment modes to try
+    if alignment_mode == "auto":
+        align_modes = ["strict", "moderate", "loose"]
+    else:
+        align_modes = [alignment_mode]
+
+    # Create the ASR configuration for each alignment mode needed
+    asr_configs = [
+        create_asr_config(config, audio, save_temps, debug_aligner, align_mode)
+        for align_mode in align_modes
+    ]
+    asr_config = asr_configs[0]  # Default/first ASR Config
+
+    # Process audio, silencing or removing any DNA segments
+    if "do-not-align" in config:
+        audio_data, dna_segments, removed_segments = process_dna(
+            dna_config=config["do-not-align"],
+            audio=audio,
+            audio_path=audio_path,
+            save_temps=save_temps,
+        )
+    else:
+        audio_data = audio
+        dna_segments = []
+        removed_segments = []
+
+    # Note: the frames are typically 0.01s long (i.e., the frame rate is typically 100),
+    # while the audio segments manipulated using pydub are sliced and accessed in
+    # millisecond intervals. For audio segments, the ms slice assumption is hard-coded
+    # all over, while frame_size is used to convert segment boundaries returned by
+    # soundswallower, which are indexes in frames, into durations in seconds.
+    frame_size = 1.0 / asr_config["frate"]
+
+    # Get list of words to ignore in aligner output
+    noisewords = read_noisedict(asr_config)
+
+    # Extract the list of sequences of words in the XML
+    word_sequences = get_sequences(xml, xml_path, unit=unit)
+    final_end = 0.0
+    for i, word_sequence in enumerate(word_sequences):
+        for j, cur_asr_config in enumerate(asr_configs):
+            # Run the aligner on this sequence
+            segmentation = align_sequence(
+                audio_data=audio_data,
+                word_sequence=word_sequence,
+                asr_config=cur_asr_config,
+                xml_path=xml_path,
+                i=i,
+                unit=unit,
+                save_temps=save_temps,
+            )
+
+            # List of removed segments for the sequence we are currently processing
+            curr_removed_segments = dna_union(
+                word_sequence.start,
+                word_sequence.end,
+                audio_length_in_ms,
+                removed_segments,
+            )
+            # Process raw segmentation, adjusting alignments for DNA
+            aligned_words = process_segmentation(
+                segmentation=segmentation,
+                curr_removed_segments=curr_removed_segments,
+                noisewords=noisewords,
+                frame_size=frame_size,
+                debug_aligner=debug_aligner,
+            )
+
+            if len(aligned_words) != len(word_sequence.words):
+                LOGGER.warning(f"Align mode {align_modes[j]} failed for sequence {i}.")
+            else:
+                LOGGER.info(f"Align mode {align_modes[j]} succeeded for sequence {i}.")
+                break
+
+        results["words"].extend(aligned_words)
+        if aligned_words:
+            final_end = aligned_words[-1]["end"]
+        if len(aligned_words) != len(word_sequence.words):
+            LOGGER.warning(
+                f"Word sequence {i+1} had {len(word_sequence.words)} tokens "
+                f"but produced {len(aligned_words)} segments. "
+                "Check that the anchors are well positioned or "
+                "that the audio corresponds to the text."
+            )
+
+    aligned_segment_count = len(results["words"])
+    token_count = len(results["tokenized"].xpath(f"//{unit}"))
+    LOGGER.info(f"Number of words found: {token_count}")
+    LOGGER.info(f"Number of aligned segments: {aligned_segment_count}")
+
+    if aligned_segment_count == 0:
+        raise RuntimeError(
+            "Alignment produced only noise or silence segments, "
+            "please verify that the text is an actual transcript of the audio."
+        )
+    if aligned_segment_count != token_count:
+        LOGGER.warning(
+            "Alignment produced a different number of segments and tokens than "
+            "were in the input. Sequences between some anchors probably did not "
+            "align successfully. Look for more anchors-related warnings above in the log."
+        )
+
+    # Split silences if requested
+    if not bare:
+        # Take all the boundaries (anchors) around segments and add them as DNA
+        # segments for the purpose of splitting silences
+        dna_for_silence_splitting = copy.deepcopy(dna_segments)
+        last_end = None
+        for seq in word_sequences:
+            if last_end or seq.start:
+                dna_for_silence_splitting.append(
+                    {"begin": (last_end or seq.start), "end": (seq.start or last_end)}
+                )
+            last_end = seq.end
+        if last_end:
+            dna_for_silence_splitting.append({"begin": last_end, "end": last_end})
+        dna_for_silence_splitting = sort_and_join_dna_segments(
+            dna_for_silence_splitting
+        )
+        split_silences(results["words"], final_end, dna_for_silence_splitting)
+
+    # Insert silences if requested
+    insert_silence(
+        results=results,
+        audio=audio,
+        xml_path=xml_path,
+    )
     return results
 
 
-def save_readalong(  # noqa C901
-    # noqa C901 - ignore the complexity of this function
+def get_audio_duration(audiofile: str) -> float:
+    """Return the duration of audiofile in seconds"""
+    audio = read_audio_from_file(audiofile)
+    return audio.frame_count() / audio.frame_rate
+
+
+def save_label_files(
+    words: List[dict],
+    tokenized_xml: etree.ElementTree,
+    duration: float,
+    output_base: str,
+    output_formats: Iterable[str],
+):
+    """Save label (TextGrid and/or EAF) files.
+
+    Args:
+        words: list of words with "id", "start" and "end"
+        tokenized_xml: tokenized or g2p'd parsed XML object
+        duration: length of the audio in seconds
+        output_base (str): Base path for output files
+        output_formats (Iterable[str]): List of output formats
+
+    Raises:
+        IndexError: words and tokenized_xml have inconsistent IDs
+        Exception: TODO, not sure what else this can raise
+    """
+    words_with_text, sentences = get_word_texts_and_sentences(words, tokenized_xml)
+    textgrid = create_text_grid(words_with_text, sentences, duration)
+
+    if "textgrid" in output_formats:
+        textgrid.to_file(output_base + ".TextGrid")
+
+    if "eaf" in output_formats:
+        textgrid.to_eaf().to_file(output_base + ".eaf")
+
+
+def save_subtitles(
+    words: List[dict],
+    tokenized_xml: etree.ElementTree,
+    output_base: str,
+    output_formats=Iterable[str],
+):
+    """Save subtitle (SRT and/or VTT) files.
+
+    Args:
+        words: list of words with "id", "start" and "end"
+        tokenized_xml: tokenized or g2p'd parsed XML object
+        output_base (str): Base path for output files
+        output_formats (Iterable[str]): List of output formats
+
+    Raises:
+        IndexError: words and tokenized_xml have inconsistent IDs
+        Exception: TODO, not sure what else this can raise
+    """
+    words_with_text, sentences = get_word_texts_and_sentences(words, tokenized_xml)
+    cc_sentences = write_to_subtitles(sentences)
+    cc_words = write_to_subtitles(words_with_text)
+
+    if "srt" in output_formats:
+        cc_sentences.save_as_srt(output_base + "_sentences.srt")
+        cc_words.save_as_srt(output_base + "_words.srt")
+
+    if "vtt" in output_formats:
+        cc_words.save(output_base + "_words.vtt")
+        cc_sentences.save(output_base + "_sentences.vtt")
+
+
+def save_audio(
+    audiofile: str, output_base: str, audiosegment: Optional[AudioSegment] = None
+) -> str:
+    """Save audio file.
+
+    Args:
+        audiofile (str): Path to input audio
+        output_base (str): Base path for output files
+        output_formats (Iterable[str]): List of output formats
+        audiosegment (AudioSegment): Optional; trimmed/muted audio
+    Returns:
+        str: Path to output audio file.
+    """
+    _, audio_ext = os.path.splitext(audiofile)
+    audio_path = output_base + audio_ext
+    audio_format = audio_ext[1:]
+    if audiosegment is not None:
+        if audio_format in ["m4a", "aac"]:
+            audio_format = "ipod"
+        try:
+            audiosegment.export(audio_path, format=audio_format)
+        except CouldntEncodeError:
+            LOGGER.warning(
+                f"The audio file at {audio_path} could \
+                not be exported in the {audio_format} format. \
+                Please ensure your installation of ffmpeg has \
+                the necessary codecs."
+            )
+            audio_path = output_base + ".wav"
+            audiosegment.export(audio_path, format="wav")
+    else:
+        shutil.copy(audiofile, audio_path)
+    return audio_path
+
+
+def save_images(config: Dict[str, Any], output_dir: str):
+    """Save image files specified in config.
+
+    Args:
+        config (dict): ReadAlong-Studio configuration
+        output_dir (str): Output directory
+    Raises:
+        FileExistsError: If output directory already exists
+    """
+    assets_dir = os.path.join(output_dir, "assets")
+    try:
+        os.mkdir(assets_dir)
+    except FileExistsError:
+        if not os.path.isdir(assets_dir):
+            raise
+    for _, image in config["images"].items():
+        if image[0:4] == "http":
+            LOGGER.warning(
+                f"Please make sure {image} is accessible to clients using your read-along."
+            )
+        else:
+            try:
+                shutil.copy(image, assets_dir)
+            except Exception as e:
+                LOGGER.warning(
+                    f"Please copy {image} to {assets_dir} before deploying your read-along. ({e})"
+                )
+            if os.path.basename(image) != image:
+                LOGGER.warning(
+                    f"Read-along images were tested with absolute urls (starting with http(s):// "
+                    f"and filenames without a path. {image} might not work as specified."
+                )
+
+
+def save_readalong(
     # this * forces all arguments to be passed by name, because I don't want any
     # code to depend on their order in the future
     *,
@@ -482,6 +876,9 @@ def save_readalong(  # noqa C901
     Raises:
         [TODO]
     """
+    if config is None:
+        config = {}
+
     # Round all times to three digits, anything more is excess precision
     # poluting the output files, and usually due to float rounding errors anyway.
     for w in align_results["words"]:
@@ -491,31 +888,23 @@ def save_readalong(  # noqa C901
     output_base = os.path.join(output_dir, output_basename)
 
     # Create textgrid object if outputting to TextGrid or eaf
-    if "TextGrid" in output_formats or "eaf" in output_formats:
-        audio = read_audio_from_file(audiofile)
-        duration = audio.frame_count() / audio.frame_rate
-        words, sentences = return_words_and_sentences(align_results)
-        textgrid = write_to_text_grid(words, sentences, duration)
-
-        if "TextGrid" in output_formats:
-            textgrid.to_file(output_base + ".TextGrid")
-
-        if "eaf" in output_formats:
-            textgrid.to_eaf().to_file(output_base + ".eaf")
+    if "textgrid" in output_formats or "eaf" in output_formats:
+        save_label_files(
+            words=align_results["words"],
+            tokenized_xml=align_results["tokenized"],
+            duration=get_audio_duration(audiofile),
+            output_base=output_base,
+            output_formats=output_formats,
+        )
 
     # Create webvtt object if outputting to vtt or srt
     if "srt" in output_formats or "vtt" in output_formats:
-        words, sentences = return_words_and_sentences(align_results)
-        cc_sentences = write_to_subtitles(sentences)
-        cc_words = write_to_subtitles(words)
-
-        if "srt" in output_formats:
-            cc_sentences.save_as_srt(output_base + "_sentences.srt")
-            cc_words.save_as_srt(output_base + "_words.srt")
-
-        if "vtt" in output_formats:
-            cc_words.save(output_base + "_words.vtt")
-            cc_sentences.save(output_base + "_sentences.vtt")
+        save_subtitles(
+            words=align_results["words"],
+            tokenized_xml=align_results["tokenized"],
+            output_base=output_base,
+            output_formats=output_formats,
+        )
 
     tokenized_xml_path = output_base + ".xml"
     save_xml(tokenized_xml_path, align_results["tokenized"])
@@ -525,38 +914,30 @@ def save_readalong(  # noqa C901
         tokenized_xhtml_path = output_base + ".xhtml"
         save_xml(tokenized_xhtml_path, align_results["tokenized"])
 
-    _, audio_ext = os.path.splitext(audiofile)
-    audio_path = output_base + audio_ext
-    audio_format = audio_ext[1:]
-    if audiosegment:
-        if audio_format in ["m4a", "aac"]:
-            audio_format = "ipod"
-        try:
-            audiosegment.export(audio_path, format=audio_format)
-        except CouldntEncodeError:
-            LOGGER.warning(
-                f"The audio file at {audio_path} could \
-                not be exported in the {audio_format} format. \
-                Please ensure your installation of ffmpeg has \
-                the necessary codecs."
-            )
-            audio_path = output_base + ".wav"
-            audiosegment.export(audio_path, format="wav")
-    else:
-        shutil.copy(audiofile, audio_path)
+    audio_path = save_audio(
+        audiofile=audiofile, output_base=output_base, audiosegment=audiosegment
+    )
 
     smil_path = output_base + ".smil"
     smil = make_smil(
         os.path.basename(tokenized_xml_path),
         os.path.basename(audio_path),
-        align_results,
+        align_results["words"],
     )
     save_txt(smil_path, smil)
 
     if "html" in output_formats:
         html_out_path = output_base + ".html"
-        html_out = create_web_component_html(tokenized_xml_path, smil_path, audio_path)
-        with open(html_out_path, "w") as f:
+        html_out = create_web_component_html(
+            tokenized_xml_path,
+            smil_path,
+            audio_path,
+            config.get("title", "Title goes here"),
+            config.get("header", "Header goes here"),
+            config.get("subheader", ""),
+            config.get("theme", "light"),
+        )
+        with open(html_out_path, "w", encoding="utf-8") as f:
             f.write(html_out)
 
     save_minimal_index_html(
@@ -564,103 +945,86 @@ def save_readalong(  # noqa C901
         os.path.basename(tokenized_xml_path),
         os.path.basename(smil_path),
         os.path.basename(audio_path),
+        config.get("title", "Title goes here"),
+        config.get("header", "Header goes here"),
+        config.get("subheader", ""),
+        config.get("theme", "light"),
     )
 
     # Copy the image files to the output's asset directory, if any are found
-    if config and "images" in config:
-        assets_dir = os.path.join(output_dir, "assets")
-        try:
-            os.mkdir(assets_dir)
-        except FileExistsError:
-            if not os.path.isdir(assets_dir):
-                raise
-        for _, image in config["images"].items():
-            if image[0:4] == "http":
-                LOGGER.warning(
-                    f"Please make sure {image} is accessible to clients using your read-along."
-                )
-            else:
-                try:
-                    shutil.copy(image, assets_dir)
-                except Exception as e:
-                    LOGGER.warning(
-                        f"Please copy {image} to {assets_dir} before deploying your read-along. ({e})"
-                    )
-                if os.path.basename(image) != image:
-                    LOGGER.warning(
-                        f"Read-along images were tested with absolute urls (starting with http(s):// "
-                        f"and filenames without a path. {image} might not work as specified."
-                    )
+    if "images" in config:
+        save_images(config=config, output_dir=output_dir)
 
 
-def return_word_from_id(xml: etree, el_id: str) -> str:
-    """Given an XML document, return the innertext at id
+def get_word_element(xml: etree.ElementTree, el_id: str) -> etree.ElementTree:
+    """Get the xml etree for a given word by its id"""
+    return xml.xpath(f'//w[@id="{el_id}"]')[0]
 
-    Args:
-        xml (etree): XML document
-        el_id (str): ID
 
-    Returns:
-        str: Innertext of element with el_id in xml
-    """
-    return xml.xpath('//*[@id="%s"]/text()' % el_id)[0]
+def get_ancestor_sent_el(word_el: etree.ElementTree) -> Union[None, etree.ElementTree]:
+    """Get the ancestor <s> node for word_el, or None"""
+    while word_el is not None and word_el.tag != "s":
+        word_el = word_el.getparent()
+    return word_el
 
 
-def return_words_and_sentences(results):
-    """Parse xml into word and sentence 'tier' data
+def get_word_texts_and_sentences(
+    words: List[dict], tokenized_xml: etree.ElementTree
+) -> Tuple[List[dict], List[List[dict]]]:
+    """Parse xml into word and sentence 'tier' data with full textual words
 
     Args:
-        results([TODO type]): [TODO description]
+        words: list of words with "id", "start" and "end"
+        tokenized_xml: tokenized or g2p'd parsed XML object
 
     Returns:
-        [TODO type]: [TODO description]
+        list of words, list of sentences (as a list of lists of words)
+        The returned words are dicts containing:
+           "text": the actual textual word from the XML (not the ID)
+           "start": start time
+           "end": end time
     """
-    result_id_pattern = re.compile(
-        r"""
-        t(?P<table>\d*)            # Table
-        b(?P<body>\d*)             # Body
-        d(?P<div>\d*)              # Div ( Break )
-        p(?P<par>\d*)              # Paragraph
-        s(?P<sent>\d+)             # Sentence
-        w(?P<word>\d+)             # Word
-        """,
-        re.VERBOSE,
-    )
-
-    all_els = results["words"]
-    xml = results["tokenized"]
     sentences = []
-    words = []
-    all_words = []
-    current_sent = 0
-    for el in all_els:
-        parsed = re.search(result_id_pattern, el["id"])
-        sent_i = parsed.group("sent")
-        if int(sent_i) is not current_sent:
-            sentences.append(words)
-            words = []
-            current_sent += 1
-        word = {
-            "text": return_word_from_id(xml, el["id"]),
-            "start": el["start"],
-            "end": el["end"],
+    sent_words: List[Dict[str, Any]] = []
+    all_words: List[Dict[str, Any]] = []
+    prev_sent_el = None
+    for word in words:
+        # The sentence is considered the set of words under the same <s> element.
+        # A word that's not under any <s> element is bad input, but we consider
+        # it a sentence by itself for software robustness.
+        word_el = get_word_element(tokenized_xml, word["id"])
+        sent_el = get_ancestor_sent_el(word_el)
+        if prev_sent_el is None or sent_el is not prev_sent_el:
+            if sent_words:
+                sentences.append(sent_words)
+            sent_words = []
+            prev_sent_el = sent_el
+        word_with_text = {
+            "text": get_word_text(word_el),
+            "start": word["start"],
+            "end": word["end"],
         }
-        words.append(word)
-        all_words.append(word)
-    sentences.append(words)
+        if all_words:
+            assert word_with_text["start"] >= all_words[-1]["end"]
+        sent_words.append(word_with_text)
+        all_words.append(word_with_text)
+    if sent_words:
+        sentences.append(sent_words)
     return all_words, sentences
 
 
-def write_to_text_grid(words: List[dict], sentences: List[dict], duration: float):
-    """Write results to Praat TextGrid. Because we are using pympi, we can also export to Elan EAF.
+def create_text_grid(
+    words: List[dict], sentences: List[List[dict]], duration: float
+) -> TextGrid:
+    """Create Praat TextGrid from results. Because we are using pympi, we can also export to Elan EAF.
 
     Args:
-        words (List[dict]): List of word times containing start, end, and value keys
-        sentences (List[dict]): List of sentence times containing start, end, and value keys
+        words (List[dict]): List of words containing "text", "start", "end"
+        sentences (List[dict]): List of sentences (as a list of lists of word dicts)
         duration (float): duration of entire audio
 
     Returns:
-        TextGrid: Praat TextGrid with word and sentence alignments
+        TextGrid: Praat TextGrid object with word and sentence alignments
     """
     text_grid = TextGrid(xmax=duration)
     sentence_tier = text_grid.add_tier(name="Sentence")
@@ -757,9 +1121,6 @@ def convert_to_xhtml(tokenized_xml, title="Book"):
 
 TEI_TEMPLATE = """<?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="{{main_lang}}" fallback-langs="{{fallback_langs}}">
         <body>
         {{#pages}}
@@ -779,13 +1140,56 @@ def convert_to_xhtml(tokenized_xml, title="Book"):
 """
 
 
+def create_tei_from_text(lines: Iterable[str], text_languages=Sequence[str]) -> str:
+    """Create input xml in TEI standard.
+        Uses the line sequence to infer paragraph and sentence structure from plain text:
+        Assumes a double blank line marks a page break, and a single blank line
+        marks a paragraph break.
+        Creates the XML using chevron
+
+    Args:
+        lines: lines from the input plain text, e.g., f.readlines() on file handle f
+        text_languages: non-empty list of languages for g2p conversion
+
+    Returns:
+        str: Formatted XML, ready to print
+    """
+    assert text_languages, "The text_languages list may not be empty."
+    kwargs = {
+        "main_lang": text_languages[0],
+        "fallback_langs": ",".join(text_languages[1:]),
+    }
+    pages: List[dict] = []
+    paragraphs: List[dict] = []
+    sentences: List[str] = []
+    for line in lines:
+        stripped_line = line.strip()
+        if stripped_line == "":
+            if not sentences:
+                # consider this a page break (unless at the beginning)
+                pages.append({"paragraphs": paragraphs})
+                paragraphs = []
+            else:
+                # add sentences and begin new paragraph
+                paragraphs.append({"sentences": sentences})
+                sentences = []
+        else:
+            # Add text to sentence
+            sentences.append(stripped_line)
+    # Add the last paragraph/sentence
+    if sentences:
+        paragraphs.append({"sentences": sentences})
+    if paragraphs:
+        pages.append({"paragraphs": paragraphs})
+    return chevron.render(TEI_TEMPLATE, {**kwargs, **{"pages": pages}})
+
+
 def create_input_tei(**kwargs):
     """Create input xml in TEI standard.
         Uses readlines to infer paragraph and sentence structure from plain text.
-        TODO: Check if path, if it's just plain text, then render that instead of reading from the file
         Assumes a double blank line marks a page break, and a single blank line
         marks a paragraph break.
-        Outputs to uft-8 XML using pymustache.
+        Outputs to uft-8 XML using chevron.
 
     Args:
         **kwargs: dict containing these arguments:
@@ -806,7 +1210,7 @@ def create_input_tei(**kwargs):
     try:
         if kwargs.get("input_file_name", False):
             filename = kwargs["input_file_name"]
-            with io.open(kwargs["input_file_name"], encoding="utf8") as f:
+            with io.open(kwargs["input_file_name"], encoding="utf-8-sig") as f:
                 text = f.readlines()
         elif kwargs.get("input_file_handle", False):
             filename = kwargs["input_file_handle"].name
@@ -822,14 +1226,11 @@ def create_input_tei(**kwargs):
     text_langs = kwargs.get("text_languages", None)
     assert text_langs and isinstance(text_langs, (list, tuple)), "need text_languages"
 
-    kwargs["main_lang"] = text_langs[0]
-    kwargs["fallback_langs"] = ",".join(text_langs[1:])
-
-    save_temps = kwargs.get("save_temps", False)
+    save_temps = kwargs.get("save_temps", None)
     if kwargs.get("output_file", False):
         filename = kwargs.get("output_file")
         outfile = io.open(filename, "wb")
-    elif save_temps:
+    elif save_temps is not None:
         filename = save_temps + ".input.xml"
         outfile = io.open(filename, "wb")
     else:
@@ -837,28 +1238,7 @@ def create_input_tei(**kwargs):
             prefix="readalongs_xml_", suffix=".xml", delete=True
         )
         filename = outfile.name
-    pages = []
-    paragraphs = []
-    sentences = []
-    for line in text:
-        if line == "\n":
-            if not sentences:
-                # consider this a page break (unless at the beginning)
-                pages.append({"paragraphs": paragraphs})
-                paragraphs = []
-            else:
-                # add sentences and begin new paragraph
-                paragraphs.append({"sentences": sentences})
-                sentences = []
-        else:
-            # Add text to sentence
-            sentences.append(line.strip())
-    # Add the last paragraph/sentence
-    if sentences:
-        paragraphs.append({"sentences": sentences})
-    if paragraphs:
-        pages.append({"paragraphs": paragraphs})
-    xml = chevron.render(TEI_TEMPLATE, {**kwargs, **{"pages": pages}})
+    xml = create_tei_from_text(text, text_langs)
     outfile.write(xml.encode("utf-8"))
     outfile.flush()
     outfile.close()
diff --git a/readalongs/api.py b/readalongs/api.py
new file mode 100644
index 00000000..c77454de
--- /dev/null
+++ b/readalongs/api.py
@@ -0,0 +1,159 @@
+"""
+api.py: API for calling readalongs CLI commands programmatically
+
+In this API, functions take the same arguments as on the readalongs
+command-line interface. The mapping between CLI options and API options is
+that the first long variant of an option described in "readalongs <cmd> -h" is
+the API option name, with hyphens replaced by undercores.
+
+Example from readalongs align -h:
+    option in CLI                       option in API
+    ================================    =================================
+    -l, --language, --languages TEXT    language=["l1", "l2"]
+    -f, --force-overwrite               force_overwrite=True
+    -c, --config PATH                   config=os.path.join("some", "path", "config.json")
+                                     OR config=pathlib.Path("/some/path/config.json")
+
+As shown above, file names can be constructed using os.path.join() or a Path
+class like pathlib.Path. Warning: don't just use "/some/path/config.json"
+because that is not portable accross platforms.
+
+Options that can be specified multiple times on the CLI should be provided as a
+list to the API methods.
+
+All API functions return the following tuple: (status, exception, log)
+ - status: 0 for OK, non-0 for Error
+ - exception: any exception caught, one of:
+    - click.BadParameter: when the is an error with the combination of parameters given
+    - click.UsageError: when the alignment task requested cannot be completed
+    - other exceptions: something else unexpected went wrong. Please report this as
+                        a bug at https://github.com/ReadAlongs/Studio/issues if
+                        you come accross such an exception and you believe the
+                        problem is not in your own code.
+ - log: any logging messages issued during execution
+"""
+
+import io
+import logging
+from typing import Optional, Tuple
+
+import click
+
+from readalongs import cli
+from readalongs.log import LOGGER
+from readalongs.util import JoinerCallbackForClick, get_langs_deferred
+
+
+def align(
+    textfile, audiofile, output_base, language=(), output_formats=(), **kwargs
+) -> Tuple[int, Optional[Exception], str]:
+    """Run the "readalongs align" command from within a Python script.
+
+    Args:
+        textfile (str | Path): input text file (XML or plain text)
+        audiofile (str | Path): input audio file (format supported by ffmpeg)
+        output_base (str | Path): basename for output files
+        language (List[str]): Specify only of textfile is plain text;
+            list of languages for g2p and g2p cascade
+        save_temps (bool): Optional; whether to save temporary files
+
+        Run "readalongs align -h" or consult
+        https://readalong-studio.readthedocs.io/en/latest/cli-ref.html#readalongs-align
+        for the full list of arguments and their meaning.
+
+    Returns: (status, exception, log_text)
+    """
+
+    logging_stream = io.StringIO()
+    logging_handler = logging.StreamHandler(logging_stream)
+    try:
+        # Capture the logs
+        LOGGER.addHandler(logging_handler)
+
+        align_args = {param.name: param.default for param in cli.align.params}
+        if language:
+            language = JoinerCallbackForClick(get_langs_deferred())(
+                value_groups=language
+            )
+        if output_formats:
+            output_formats = JoinerCallbackForClick(
+                cli.SUPPORTED_OUTPUT_FORMATS, drop_case=True
+            )(value_groups=output_formats)
+
+        align_args.update(
+            textfile=textfile,
+            audiofile=audiofile,
+            output_base=output_base,
+            language=language,
+            output_formats=output_formats,
+            **kwargs
+        )
+
+        cli.align.callback(**align_args)  # type: ignore
+
+        return (0, None, logging_stream.getvalue())
+    except Exception as e:
+        return (1, e, logging_stream.getvalue())
+    finally:
+        # Remove the log-capturing handler
+        LOGGER.removeHandler(logging_handler)
+
+
+def make_xml(
+    plaintextfile, xmlfile, language, **kwargs
+) -> Tuple[int, Optional[Exception], str]:
+    """Run the "readalongs make-xml" command from within a Python script.
+
+    Args:
+        plaintextfile (str | Path): input plain text file
+        xmlfile (str | Path): output XML file
+        language (List[str]): list of languages for g2p and g2p cascade
+
+        Run "readalongs make-xml -h" or consult
+        https://readalong-studio.readthedocs.io/en/latest/cli-ref.html#readalongs-make-xml
+        for the full list of arguments and their meaning.
+
+    Returns: (status, exception, log_text)
+    """
+    # plaintextfile is not a file object if passed from click
+    plaintextfile = (
+        plaintextfile.name
+        if isinstance(plaintextfile, click.utils.LazyFile)
+        else plaintextfile
+    )
+    logging_stream = io.StringIO()
+    logging_handler = logging.StreamHandler(logging_stream)
+    try:
+        # Capture the logs
+        LOGGER.addHandler(logging_handler)
+
+        make_xml_args = {param.name: param.default for param in cli.make_xml.params}
+        try:
+            with open(plaintextfile, "r", encoding="utf-8-sig") as plaintextfile_handle:
+                make_xml_args.update(
+                    plaintextfile=plaintextfile_handle,
+                    xmlfile=xmlfile,
+                    language=JoinerCallbackForClick(get_langs_deferred())(
+                        value_groups=language
+                    ),
+                    **kwargs
+                )
+                cli.make_xml.callback(**make_xml_args)  # type: ignore
+        except OSError as e:
+            # e.g.: FileNotFoundError or PermissionError on open(plaintextfile) above
+            raise click.UsageError(str(e)) from e
+
+        return (0, None, logging_stream.getvalue())
+    except Exception as e:
+        return (1, e, logging_stream.getvalue())
+    finally:
+        # Remove the log-capturing handler
+        LOGGER.removeHandler(logging_handler)
+
+
+def prepare(*args, **kwargs):
+    """Deprecated, use make_xml instead"""
+    LOGGER.warning(
+        "readalongs.api.prepare() is deprecated. Please use make_xml() instead."
+    )
+    return make_xml(*args, **kwargs)
diff --git a/readalongs/app.py b/readalongs/app.py
index 8c3f5604..87945ad0 100644
--- a/readalongs/app.py
+++ b/readalongs/app.py
@@ -15,4 +15,4 @@
 Session(app)
 socketio = SocketIO(app, manage_session=False)
 
-import readalongs.views  # noqa: E402
+import readalongs.views  # noqa: E402 F401
diff --git a/readalongs/audio_utils.py b/readalongs/audio_utils.py
index da0160f2..7ba85f7c 100644
--- a/readalongs/audio_utils.py
+++ b/readalongs/audio_utils.py
@@ -4,7 +4,7 @@
 in millisecond slices and lets us manipulate them as if they were simple lists.
 """
 
-from typing import List, Optional, Tuple, Union
+from typing import Union
 
 from pydub import AudioSegment
 
@@ -12,8 +12,7 @@
 
 
 def join_section(audio: AudioSegment, audio_to_insert: AudioSegment, start: int):
-    """ Given two AudioSegments, insert the second into the first at start (ms)
-    """
+    """Given two AudioSegments, insert the second into the first at start (ms)"""
     try:
         return audio[:start] + audio_to_insert + audio[start:]
     except IndexError:
@@ -25,8 +24,7 @@ def join_section(audio: AudioSegment, audio_to_insert: AudioSegment, start: int)
 
 
 def remove_section(audio: AudioSegment, start: int, end: int) -> AudioSegment:
-    """ Given an AudioSement, remove the section between start (ms) and end (ms)
-    """
+    """Given an AudioSement, remove the section between start (ms) and end (ms)"""
     try:
         return audio[:start] + audio[end:]
     except IndexError:
@@ -38,7 +36,7 @@ def remove_section(audio: AudioSegment, start: int, end: int) -> AudioSegment:
 
 
 def mute_section(audio: AudioSegment, start: int, end: int) -> AudioSegment:
-    """ Given an AudioSegment, reduce the gain between a given interval by 120db.
+    """Given an AudioSegment, reduce the gain between a given interval by 120db.
         Effectively, make it silent.
 
     Args:
@@ -62,7 +60,7 @@ def mute_section(audio: AudioSegment, start: int, end: int) -> AudioSegment:
 def extract_section(
     audio: AudioSegment, start: Union[None, int], end: Union[None, int]
 ) -> AudioSegment:
-    """ Given an AudioSegment, extract and keep only the [start, end) interval
+    """Given an AudioSegment, extract and keep only the [start, end) interval
 
     Args:
         audio (AudioSegment): audio segment to extract a section from
@@ -89,7 +87,7 @@ def extract_section(
 
 
 def write_audio_to_file(audio: AudioSegment, path: str) -> None:
-    """ Write AudioSegment to file
+    """Write AudioSegment to file
 
     Args:
         audio (AudioSegment): audio segment to write
@@ -105,7 +103,7 @@ def write_audio_to_file(audio: AudioSegment, path: str) -> None:
 
 
 def read_audio_from_file(path: str) -> AudioSegment:
-    """ Read in AudioSegment from file
+    """Read in AudioSegment from file
 
     Args:
         path (str): Path to audiofile
diff --git a/readalongs/cli.py b/readalongs/cli.py
index 7b95de36..4b9af328 100644
--- a/readalongs/cli.py
+++ b/readalongs/cli.py
@@ -4,8 +4,8 @@
 
 CLI commands implemented in this file:
  - align   : main command to align text and audio
- - prepare : prepare XML input for align from plain text
- - tokenize: tokenize the prepared file
+ - make-xml : make XML input for align from plain text
+ - tokenize: tokenize the XML file
  - g2p     : apply g2p to the tokenized file
  - langs   : list languages supported by align
 """
@@ -26,7 +26,12 @@
 from readalongs.text.convert_xml import convert_xml
 from readalongs.text.tokenize_xml import tokenize_xml
 from readalongs.text.util import save_xml, write_xml
-from readalongs.util import JoinerCallback, getLangs, getLangsDeferred
+from readalongs.util import (
+    JoinerCallbackForClick,
+    get_langs,
+    get_langs_deferred,
+    get_obsolete_callback_for_click,
+)
 
 SUPPORTED_OUTPUT_FORMATS = {
     "eaf": "ELAN file",
@@ -84,7 +89,7 @@ def cli():
     although other output formats like subtitles or Praat TextGrids are available.
 
     You can use this command line tool in two ways. The "end-to-end" method with the
-    "align" command, or using a sequence of steps with "prepare", "tokenize", and "g2p"
+    "align" command, or using a sequence of steps with "make-xml", "tokenize", and "g2p"
     to get more control over the process.
 
     ## End-to-End
@@ -102,16 +107,16 @@ def cli():
     Using ReadAlongs this way, you must use the following commands in sequence.
 
     \b
-    prepare
+    make-xml
     =======
     If you have plain text and you want to mark up some of the XML, you can
-    use this command to "prepare" your plain text into the XML structure
+    use this command to turn your plain text into the XML structure
     used by readalongs.
 
     \b
     tokenize
     ========
-    Use this command to tokenize the output of the previous "readalongs prepare" command.
+    Use this command to tokenize the output of the previous "readalongs make-xml" command.
 
     \b
     g2p
@@ -127,7 +132,7 @@ def cli():
     """
 
 
-@cli.command(  # noqa: C901
+@cli.command(  # type: ignore  # noqa: C901  # some versions of flake8 need this here
     context_settings=CONTEXT_SETTINGS, short_help="Force align a text and a sound file."
 )
 @click.argument("textfile", type=click.Path(exists=True, readable=True))
@@ -149,7 +154,7 @@ def cli():
     "-o",
     "--output-formats",
     multiple=True,
-    callback=JoinerCallback(SUPPORTED_OUTPUT_FORMATS),
+    callback=JoinerCallbackForClick(SUPPORTED_OUTPUT_FORMATS, drop_case=True),
     help=(
         "Comma- or colon-separated list of additional output file formats to export to. "
         "The text is always exported as XML and alignments as SMIL, but "
@@ -157,7 +162,6 @@ def cli():
         + SUPPORTED_OUTPUT_FORMATS_DESC
     ),
 )
-@click.option("-d", "--debug", is_flag=True, help="Add debugging messages to logger")
 @click.option(
     "-f", "--force-overwrite", is_flag=True, help="Force overwrite output files"
 )
@@ -167,7 +171,11 @@ def cli():
     hidden=True,
     is_flag=True,
     default=None,
-    help="OBSOLETE; the input format is now guessedb by extension or contents",
+    help="OBSOLETE; the input format is now guessed by extension or contents",
+    callback=get_obsolete_callback_for_click(
+        ".txt files are now read as plain text, .xml as XML, and other files based on\n"
+        "whether they start with <?xml or not."
+    ),
 )
 @click.option(
     "--lang-no-append-und",
@@ -176,12 +184,19 @@ def cli():
     hidden=True,
     help="Hidden option to disable to automatic appending of und (Undetermined) to -l",
 )
+@click.option(
+    "-oo",
+    "--output-orth",
+    default="eng-arpabet",
+    hidden=True,
+    help="Hidden option to change the output orthography",
+)
 @click.option(
     "-l",
     "--language",
     "--languages",
     multiple=True,
-    callback=JoinerCallback(getLangsDeferred()),
+    callback=JoinerCallbackForClick(get_langs_deferred()),
     help=(
         "The language code(s) for text in TEXTFILE (use only with plain text input); "
         "multiple codes can be joined by ',' or ':', or by repeating the option, "
@@ -189,6 +204,20 @@ def cli():
         "run 'readalongs langs' to list all supported languages."
     ),
 )
+@click.option(
+    "-m",
+    "--align-mode",
+    type=click.Choice(["strict", "moderate", "loose", "auto"], case_sensitive=False),
+    help=(
+        "Decoder search parameters: "
+        "'strict' means a narrow beam, fastest but might fail to find an alignment; "
+        "'loose' means an unlimited beam, slowest, should always succeed but the alignment is more likely to be wrong; "
+        "'moderate' is in between; "
+        "'auto' (the default) means try strict first, and fall back to moderate "
+        "then loose if no alignment is found."
+    ),
+    default="auto",
+)
 @click.option(
     "-s",
     "--save-temps",
@@ -200,14 +229,28 @@ def cli():
     hidden=True,
     default=None,
     help="OBSOLETE; enable the g2p cascade by giving -l with multiple langs instead",
+    callback=get_obsolete_callback_for_click(
+        "Specify multiple languages with the -l/--language option instead,\n"
+        "or by adding the 'fallback-langs' attribute where relevant in your XML input."
+    ),
 )
+@click.option("-d", "--debug", is_flag=True, help="Display debugging messages")
+@click.option("--debug-aligner", is_flag=True, help="Display logs from the aligner")
 @click.option(
-    "--g2p-verbose",
+    "--debug-g2p",
     is_flag=True,
     default=False,
     help="Display verbose g2p error messages",
 )
-def align(**kwargs):
+@click.option(
+    "--g2p-verbose",
+    is_flag=True,
+    hidden=True,
+    default=None,
+    help="OBSOLETE: now --debug-g2p",
+    callback=get_obsolete_callback_for_click("Use --debug-g2p instead."),
+)
+def align(**kwargs):  # noqa: C901  # some versions of flake8 need this here instead
     """Align TEXTFILE and AUDIOFILE and create output files as OUTPUT_BASE.* in directory
     OUTPUT_BASE/.
 
@@ -216,7 +259,7 @@ def align(**kwargs):
     \b
     If TEXTFILE has a .xml extension or starts with an XML declaration line,
     it is parsed as XML and can be in one of three formats:
-     - the output of 'readalongs prepare',
+     - the output of 'readalongs make-xml',
      - the output of 'readalongs tokenize', or
      - the output of 'readalongs g2p'.
 
@@ -241,9 +284,9 @@ def align(**kwargs):
     config_file = kwargs.get("config", None)
     config = None
     if config_file:
-        if config_file.endswith("json"):
+        if str(config_file).endswith("json"):
             try:
-                with open(config_file, encoding="utf8") as f:
+                with open(config_file, encoding="utf-8-sig") as f:
                     config = json.load(f)
             except json.decoder.JSONDecodeError as e:
                 raise click.BadParameter(
@@ -277,13 +320,6 @@ def align(**kwargs):
             f"Cannot write into output folder '{output_dir}'. Please verify permissions."
         ) from e
 
-    if kwargs["g2p_fallback"] is not None:
-        raise click.BadParameter(
-            "The --g2p-fallback option is obsolete.\n"
-            "Specify multiple languages with the -l/--language option instead,\n"
-            "or by adding the 'fallback-langs' attribute where relevant in your XML input."
-        )
-
     output_basename = os.path.basename(output_dir)
     temp_base = None
     if kwargs["save_temps"]:
@@ -297,17 +333,11 @@ def align(**kwargs):
     if kwargs["debug"]:
         LOGGER.setLevel("DEBUG")
 
-    if kwargs["text_input"] is not None:
-        raise click.BadParameter(
-            "The -i option is obsolete. .txt files are now read as plain text, "
-            ".xml as XML, and other files based on whether they start with <?xml or not."
-        )
-
     # Determine if the file is plain text or XML
     textfile_name = kwargs["textfile"]
-    if textfile_name.endswith(".xml"):
+    if str(textfile_name).endswith(".xml"):
         textfile_is_plaintext = False  # .xml is XML
-    elif textfile_name.endswith(".txt"):
+    elif str(textfile_name).endswith(".txt"):
         textfile_is_plaintext = True  # .txt is plain text
     else:
         # Files other than .xml or .txt are parsed using etree. If the parse is
@@ -333,7 +363,7 @@ def align(**kwargs):
                 "No input language specified for plain text input. "
                 "Please provide the -l/--language switch."
             )
-        languages = kwargs["language"]
+        languages = list(kwargs["language"])
         if not kwargs["lang_no_append_und"] and "und" not in languages:
             languages.append("und")
         plain_textfile = kwargs["textfile"]
@@ -343,7 +373,7 @@ def align(**kwargs):
                 text_languages=languages,
                 save_temps=temp_base,
             )
-        except RuntimeError as e:
+        except (RuntimeError, OSError) as e:
             raise click.UsageError(e) from e
     else:
         xml_textfile = kwargs["textfile"]
@@ -357,7 +387,10 @@ def align(**kwargs):
             bare=bare,
             config=config,
             save_temps=temp_base,
-            verbose_g2p_warnings=kwargs["g2p_verbose"],
+            verbose_g2p_warnings=kwargs["debug_g2p"],
+            debug_aligner=kwargs["debug_aligner"],
+            output_orthography=kwargs["output_orth"],
+            alignment_mode=kwargs["align_mode"],
         )
     except RuntimeError as e:
         raise click.UsageError(e) from e
@@ -377,11 +410,12 @@ def align(**kwargs):
     )
 
 
-@cli.command(
+@cli.command(  # type: ignore  # quench spurious mypy error: "Command" has no attribute "command"
     context_settings=CONTEXT_SETTINGS,
-    short_help="Convert a plain text file into the XML format for alignment.",
+    short_help="Renamed: use 'readalongs make-xml' instead.",
+    deprecated=True,
 )
-@click.argument("plaintextfile", type=click.File("r", encoding="utf8", lazy=True))
+@click.argument("plaintextfile", type=click.File("r", encoding="utf-8-sig", lazy=True))
 @click.argument("xmlfile", type=click.Path(), required=False, default="")
 @click.option("-d", "--debug", is_flag=True, help="Add debugging messages to logger")
 @click.option(
@@ -400,7 +434,7 @@ def align(**kwargs):
     "--languages",
     required=True,
     multiple=True,
-    callback=JoinerCallback(getLangsDeferred()),
+    callback=JoinerCallbackForClick(get_langs_deferred()),
     help=(
         "The language code(s) for text in PLAINTEXTFILE; "
         "multiple codes can be joined by ',' or ':', or by repeating the option, "
@@ -409,7 +443,57 @@ def align(**kwargs):
     ),
 )
 def prepare(**kwargs):
-    """Prepare XMLFILE for 'readalongs align' from PLAINTEXTFILE.
+    """DEPRECATED - renamed: use `readalongs make-xml` instead.
+
+    make XMLFILE for 'readalongs align' from PLAINTEXTFILE.
+
+    PLAINTEXTFILE must be plain text encoded in UTF-8, with one sentence per line,
+    paragraph breaks marked by a blank line, and page breaks marked by two
+    blank lines.
+
+    PLAINTEXTFILE: Path to the plain text input file, or - for stdin
+
+    XMLFILE:       Path to the XML output file, or - for stdout [default: PLAINTEXTFILE.xml]
+    """
+    LOGGER.warning(
+        'WARNING: "readalongs prepare" is deprecated. Use "readalongs make-xml" instead.'
+    )
+    make_xml.callback(**kwargs)
+
+
+@cli.command(  # type: ignore  # quench spurious mypy error: "Command" has no attribute "command"
+    context_settings=CONTEXT_SETTINGS,
+    short_help="Convert a plain text file into the XML format for alignment.",
+)
+@click.argument("plaintextfile", type=click.File("r", encoding="utf-8-sig", lazy=True))
+@click.argument("xmlfile", type=click.Path(), required=False, default="")
+@click.option("-d", "--debug", is_flag=True, help="Add debugging messages to logger")
+@click.option(
+    "-f", "--force-overwrite", is_flag=True, help="Force overwrite output files"
+)
+@click.option(
+    "--lang-no-append-und",
+    is_flag=True,
+    default=False,
+    hidden=True,
+    help="Hidden option to disable to automatic appending of und (Undetermined) to -l",
+)
+@click.option(
+    "-l",
+    "--language",
+    "--languages",
+    required=True,
+    multiple=True,
+    callback=JoinerCallbackForClick(get_langs_deferred()),
+    help=(
+        "The language code(s) for text in PLAINTEXTFILE; "
+        "multiple codes can be joined by ',' or ':', or by repeating the option, "
+        "to enable the g2p cascade (run 'readalongs g2p -h' for details); "
+        "run 'readalongs langs' to list all supported languages."
+    ),
+)
+def make_xml(**kwargs):
+    """make XMLFILE for 'readalongs align' from PLAINTEXTFILE.
 
     PLAINTEXTFILE must be plain text encoded in UTF-8, with one sentence per line,
     paragraph breaks marked by a blank line, and page breaks marked by two
@@ -423,7 +507,7 @@ def prepare(**kwargs):
     if kwargs["debug"]:
         LOGGER.setLevel("DEBUG")
         LOGGER.info(
-            "Running readalongs prepare(lang={}, force-overwrite={}, plaintextfile={}, xmlfile={}).".format(
+            "Running readalongs make-xml(lang={}, force-overwrite={}, plaintextfile={}, xmlfile={}).".format(
                 kwargs["language"],
                 kwargs["force_overwrite"],
                 kwargs["plaintextfile"],
@@ -437,23 +521,23 @@ def prepare(**kwargs):
     if not out_file:
         out_file = get_click_file_name(input_file)
         if out_file != "-":
-            if out_file.endswith(".txt"):
+            if str(out_file).endswith(".txt"):
                 out_file = out_file[:-4]
             out_file += ".xml"
 
-    languages = kwargs["language"]
+    languages = list(kwargs["language"])
     if not kwargs["lang_no_append_und"] and "und" not in languages:
         languages.append("und")
 
     try:
         if out_file == "-":
             _, filename = create_input_tei(
-                input_file_handle=input_file, text_languages=languages,
+                input_file_handle=input_file, text_languages=languages
             )
-            with io.open(filename, encoding="utf8") as f:
+            with io.open(filename, encoding="utf-8-sig") as f:
                 sys.stdout.write(f.read())
         else:
-            if not out_file.endswith(".xml"):
+            if not str(out_file).endswith(".xml"):
                 out_file += ".xml"
             if os.path.exists(out_file) and not kwargs["force_overwrite"]:
                 raise click.BadParameter(
@@ -465,15 +549,15 @@ def prepare(**kwargs):
                 text_languages=languages,
                 output_file=out_file,
             )
-    except RuntimeError as e:
+    except (RuntimeError, OSError) as e:
         raise click.UsageError(e) from e
 
     LOGGER.info("Wrote {}".format(out_file))
 
 
-@cli.command(
+@cli.command(  # type: ignore  # quench spurious mypy error: "Command" has no attribute "command"
     context_settings=CONTEXT_SETTINGS,
-    short_help="Tokenize a prepared XML file, in preparation for alignment.",
+    short_help="Tokenize an XML file, in preparation for alignment.",
 )
 @click.argument("xmlfile", type=click.File("rb"))
 @click.argument("tokfile", type=click.Path(), required=False, default="")
@@ -484,7 +568,7 @@ def prepare(**kwargs):
 def tokenize(**kwargs):
     """Tokenize XMLFILE for 'readalongs align' into TOKFILE.
 
-    XMLFILE should have been produced by 'readalongs prepare'.
+    XMLFILE should have been produced by 'readalongs make-xml'.
     TOKFILE can then be augmented with word-specific language codes.
     'readalongs align' can be called with either XMLFILE or TOKFILE as XML input.
 
@@ -497,7 +581,7 @@ def tokenize(**kwargs):
         LOGGER.setLevel("DEBUG")
         LOGGER.info(
             "Running readalongs tokenize(xmlfile={}, tokfile={}, force-overwrite={}).".format(
-                kwargs["xmlfile"], kwargs["tokfile"], kwargs["force_overwrite"],
+                kwargs["xmlfile"], kwargs["tokfile"], kwargs["force_overwrite"]
             )
         )
 
@@ -506,12 +590,12 @@ def tokenize(**kwargs):
     if not kwargs["tokfile"]:
         output_path = get_click_file_name(input_file)
         if output_path != "-":
-            if output_path.endswith(".xml"):
+            if str(output_path).endswith(".xml"):
                 output_path = output_path[:-4]
             output_path += ".tokenized.xml"
     else:
         output_path = kwargs["tokfile"]
-        if not output_path.endswith(".xml") and not output_path == "-":
+        if not str(output_path).endswith(".xml") and not output_path == "-":
             output_path += ".xml"
 
     if os.path.exists(output_path) and not kwargs["force_overwrite"]:
@@ -537,28 +621,40 @@ def tokenize(**kwargs):
     LOGGER.info("Wrote {}".format(output_path))
 
 
-@cli.command(
+@cli.command(  # type: ignore  # quench spurious mypy error: "Command" has no attribute "command"
     context_settings=CONTEXT_SETTINGS,
     short_help="Apply g2p to a tokenized file, in preparation for alignment.",
 )
-@click.argument("tokfile", type=click.File("rb", encoding="utf8", lazy=True))
+@click.argument("tokfile", type=click.File("rb", lazy=True))
 @click.argument("g2pfile", type=click.Path(), required=False, default="")
 @click.option(
     "--g2p-fallback",
     hidden=True,
     default=None,
     help="OBSOLETE; enable the g2p cascade by giving -l with multiple langs to prepare instead",
+    callback=get_obsolete_callback_for_click(
+        "Specify multiple languages with the -l/--language option to prepare instead,\n"
+        "or by adding the 'fallback-langs' attribute where relevant in your XML input."
+    ),
 )
 @click.option(
     "-f", "--force-overwrite", is_flag=True, help="Force overwrite output files"
 )
 @click.option(
-    "--g2p-verbose",
+    "--debug-g2p",
     is_flag=True,
     default=False,
     help="Display verbose messages about g2p errors.",
 )
 @click.option("-d", "--debug", is_flag=True, help="Add debugging messages to logger")
+@click.option(
+    "--g2p-verbose",
+    is_flag=True,
+    hidden=True,
+    default=None,
+    help="OBSOLETE: now --debug-g2p",
+    callback=get_obsolete_callback_for_click("Use --debug-g2p instead."),
+)
 def g2p(**kwargs):
     """Apply g2p mappings to TOKFILE into G2PFILE.
 
@@ -569,7 +665,7 @@ def g2p(**kwargs):
     The g2p cascade will be enabled whenever an XML element or any of its
     ancestors in TOKFILE has the attribute "fallback-langs" containing a comma-
     or colon-separated list of language codes. Provide multiple language codes to
-    "readalongs prepare" via its -l option to generate this attribute globally,
+    "readalongs make-xml" via its -l option to generate this attribute globally,
     or add it manually where needed. Undetermined, "und", is automatically
     added at the end of the language list provided via -l.
 
@@ -589,7 +685,7 @@ def g2p(**kwargs):
         LOGGER.setLevel("DEBUG")
         LOGGER.info(
             "Running readalongs g2p(tokfile={}, g2pfile={}, force-overwrite={}).".format(
-                kwargs["tokfile"], kwargs["g2pfile"], kwargs["force_overwrite"],
+                kwargs["tokfile"], kwargs["g2pfile"], kwargs["force_overwrite"]
             )
         )
 
@@ -598,14 +694,14 @@ def g2p(**kwargs):
     if not kwargs["g2pfile"]:
         output_path = get_click_file_name(input_file)
         if output_path != "-":
-            if output_path.endswith(".xml"):
+            if str(output_path).endswith(".xml"):
                 output_path = output_path[:-4]
-            if output_path.endswith(".tokenized"):
+            if str(output_path).endswith(".tokenized"):
                 output_path = output_path[: -len(".tokenized")]
             output_path += ".g2p.xml"
     else:
         output_path = kwargs["g2pfile"]
-        if not output_path.endswith(".xml") and not output_path == "-":
+        if not str(output_path).endswith(".xml") and not output_path == "-":
             output_path += ".xml"
 
     if os.path.exists(output_path) and not kwargs["force_overwrite"]:
@@ -625,7 +721,7 @@ def g2p(**kwargs):
     xml = add_ids(xml)
 
     # Apply the g2p mappings.
-    xml, valid = convert_xml(xml, verbose_warnings=kwargs["g2p_verbose"],)
+    xml, valid = convert_xml(xml, verbose_warnings=kwargs["debug_g2p"])
 
     if output_path == "-":
         write_xml(sys.stdout.buffer, xml)
@@ -637,15 +733,15 @@ def g2p(**kwargs):
         LOGGER.error(
             "Some word(s) could not be g2p'd correctly."
             + (
-                " Run again with --g2p-verbose to get more detailed error messages."
-                if not kwargs["g2p_verbose"]
+                " Run again with --debug-g2p to get more detailed error messages."
+                if not kwargs["debug_g2p"]
                 else ""
             )
         )
         sys.exit(1)
 
 
-@cli.command(
+@cli.command(  # type: ignore  # quench spurious mypy error: "Command" has no attribute "command"
     context_settings=CONTEXT_SETTINGS,
     short_help="List the languages supported by g2p for readalongs.",
 )
@@ -653,6 +749,6 @@ def langs():
     """List all the language codes and names currently supported by g2p
     that can be used for ReadAlongs creation.
     """
-    lang_codes, lang_names = getLangs()
-    for lang in lang_codes:
-        print("%-8s\t%s" % (lang, lang_names[lang]))
+    _, langs_dict = get_langs()
+    for code, name in langs_dict.items():
+        print("%-8s\t%s" % (code, name))
diff --git a/readalongs/dna_utils.py b/readalongs/dna_utils.py
index 0743a54d..4835abf6 100644
--- a/readalongs/dna_utils.py
+++ b/readalongs/dna_utils.py
@@ -11,7 +11,7 @@
 
 
 def sort_and_join_dna_segments(do_not_align_segments: List[dict]) -> List[dict]:
-    """ Give a list of DNA segments, sort them and join any overlapping ones """
+    """Give a list of DNA segments, sort them and join any overlapping ones"""
     results: List[dict] = []
     for seg in sorted(do_not_align_segments, key=lambda x: x["begin"]):
         if results and results[-1]["end"] >= seg["begin"]:
@@ -24,9 +24,9 @@ def sort_and_join_dna_segments(do_not_align_segments: List[dict]) -> List[dict]:
 def correct_adjustments(
     start: int, end: int, do_not_align_segments: List[dict]
 ) -> Tuple[int, int]:
-    """ Given the start and end of a segment (in ms) and a list of do-not-align segments,
-        If one of the do-not-align segments occurs inside one of the start-end range,
-        align the start or end with the do-not-align segment, whichever requires minimal change
+    """Given the start and end of a segment (in ms) and a list of do-not-align segments,
+    If one of the do-not-align segments occurs inside one of the start-end range,
+    align the start or end with the do-not-align segment, whichever requires minimal change
     """
     for seg in do_not_align_segments:
         if start < seg["begin"] and end > seg["end"]:
@@ -38,7 +38,7 @@ def correct_adjustments(
 
 
 def calculate_adjustment(timestamp: int, do_not_align_segments: List[dict]) -> int:
-    """ Given a time (in ms) and a list of do-not-align segments,
+    """Given a time (in ms) and a list of do-not-align segments,
         return the sum (ms) of the lengths of the do-not-align segments
         that start before the timestamp
 
@@ -58,7 +58,7 @@ def calculate_adjustment(timestamp: int, do_not_align_segments: List[dict]) -> i
 
 
 def segment_intersection(segments1: List[dict], segments2: List[dict]) -> List[dict]:
-    """ Return the intersection of two lists of segments
+    """Return the intersection of two lists of segments
 
     Precondition:
         segments1 and segments2 contain sorted, non-overlapping ranges
@@ -89,9 +89,9 @@ def segment_intersection(segments1: List[dict], segments2: List[dict]) -> List[d
 
 
 def dna_union(
-    start, end, audio_length: int, do_not_align_segments: List[dict],
+    start, end, audio_length: int, do_not_align_segments: List[dict]
 ) -> List[dict]:
-    """ Return the DNA list to include [start,end] and exclude do_not_align_segments
+    """Return the DNA list to include [start,end] and exclude do_not_align_segments
 
     Given time range [start, end] to keep, and a list of do-not-align-segments to
     exclude, calculate the equivalent do-not-align-segment list to keeping only
diff --git a/readalongs/epub/create_epub.py b/readalongs/epub/create_epub.py
index 70607dba..4c21f236 100644
--- a/readalongs/epub/create_epub.py
+++ b/readalongs/epub/create_epub.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 ######################################################################
diff --git a/readalongs/lang/ckt/ckt_to_ipa.backup.json b/readalongs/lang/ckt/ckt_to_ipa.backup.json
deleted file mode 100644
index 313a9e2d..00000000
--- a/readalongs/lang/ckt/ckt_to_ipa.backup.json
+++ /dev/null
@@ -1,48 +0,0 @@
-{
-    "type": "mapping",
-    "authors": ["Vasilisa Andrianets", "Patrick Littell"],
-    "created": "2019-07-02",
-    "last_modified": "2019-07-02",
-    "in_metadata": {
-      "display_name": "Chukchi",
-      "display": true,
-      "lang": "ckt",
-      "format": "custom",
-      "delimiter": "",
-      "case_insensitive": true
-    },
-    "out_metadata": {
-      "lang": "ckt-ipa",
-      "format": "ipa",
-      "delimiter": ""
-    },
-    "map": [
-        {"in":"ʼ", "out": "ʔ"},
-        {"in":"а", "out": "a"},
-        {"in":"в", "out": "w"},
-        {"in":"г", "out": "ɣ"},
-        {"in":"е", "out": "e"},
-        {"in":"и", "out": "i"},
-        {"in":"й", "out": "j"},
-        {"in":"к", "out": "k"},
-        {"in":"м", "out": "m"},
-        {"in":"н", "out": "n"},
-        {"in":"о", "out": "o"},
-        {"in":"п", "out": "p"},
-        {"in":"р", "out": "ɾ"},
-        {"in":"с", "out": "s"},
-        {"in":"ч", "out": "s"},
-        {"in":"т", "out": "t"},
-        {"in":"у", "out": "u"},
-        {"in":"ъ", "out": "ʔ"},
-        {"in":"ы", "out": "ə"},
-        {"in":"ь", "out": "ʔ"},
-        {"in":"э", "out": "e"},
-        {"in":"ю", "out": "u"},
-        {"in":"я", "out": "a"},
-        {"in":"ё", "out": "o"},
-        {"in":"ӄ", "out": "q"},
-        {"in":"ӈ", "out": "ŋ"},
-        {"in":"ԓ", "out": "ɬ"}
-    ]
-}
diff --git a/readalongs/lang/eng/eng_ipa_to_arpabet.backup.json b/readalongs/lang/eng/eng_ipa_to_arpabet.backup.json
deleted file mode 100644
index 6086c7d9..00000000
--- a/readalongs/lang/eng/eng_ipa_to_arpabet.backup.json
+++ /dev/null
@@ -1,84 +0,0 @@
-{
-   "type": "mapping",
-   "authors": ["Patrick Littell"],
-   "created": "2019-02-13",
-   "last_modified": "2019-02-15",
-   "in_metadata": {
-      "lang": "eng-ipa",
-      "format": "ipa",
-      "delimiter": ""
-   },
-   "out_metadata": {
-      "lang": "eng-arpabet",
-      "format": "arpabet",
-      "delimiter": " "
-   },
-   "map": [
-      { "in": "ɑ",	"out": "AA" },
-      { "in": "ɑ̃",	"out": "AA N" },
-      { "in": "æ",	"out": "AE" },
-      { "in": "æ̃",	"out": "AE N" },
-      { "in": "ʌ",	"out": "AH" },
-      { "in": "ʌ̃",	"out": "AH N" },
-      { "in": "ɔ",	"out": "AO" },
-      { "in": "ɔ̃",	"out": "AO N" },
-      { "in": "aʊ",	"out": "AW" },
-      { "in": "ə",	"out": "AH" },
-      { "in": "aɪ",	"out": "AY" },
-      { "in": "ɛ",	"out": "EH" },
-      { "in": "ɛ̃",	"out": "EH N" },
-      { "in": "ɜ˞",	"out": "ER" },
-      { "in": "eɪ",	"out": "EY" },
-      { "in": "eː",	"out": "EY" },
-      { "in": "ej",	"out": "EY" },
-      { "in": "ẽ",	"out": "EY N" },
-      { "in": "ẽː",	"out": "EY N" },
-      { "in": "ɪ",	"out": "IH" },
-      { "in": "ɪ̃",	"out": "IH N" },
-      { "in": "ɨ",	"out": "IX" },
-      { "in": "i",	"out": "IY" },
-      { "in": "ĩ",	"out": "IY N" },
-      { "in": "oʊ",	"out": "OW" },
-      { "in": "ow",	"out": "OW" },
-      { "in": "oː",	"out": "OW" },
-      { "in": "õ",	"out": "OW N" },
-      { "in": "õː",	"out": "OW N" },
-      { "in": "ɔɪ",	"out": "OY" },
-      { "in": "ʊ",	"out": "UH" },
-      { "in": "ʊ̃",	"out": "UH N" },
-      { "in": "u",	"out": "UW" },
-      { "in": "ũ",	"out": "UW N" },
-      { "in": "b",	"out": "B" },
-      { "in": "tʃ",	"out": "CH" },
-      { "in": "t͡ʃ",	"out": "CH" },
-      { "in": "d",	"out": "D" },
-      { "in": "ð",	"out": "DH" },
-      { "in": "ɾ",	"out": "D" },
-      { "in": "l̩",	"out": "EL" },
-      { "in": "m̩",	"out": "EM" },
-      { "in": "n̩",	"out": "EN" },
-      { "in": "f",	"out": "F" },
-      { "in": "ɡ",	"out": "G" },
-      { "in": "h",	"out": "HH" },
-      { "in": "dʒ",	"out": "JH" },
-      { "in": "k",	"out": "K" },
-      { "in": "l",	"out": "L" },
-      { "in": "m",	"out": "M" },
-      { "in": "n",	"out": "N" },
-      { "in": "ŋ",	"out": "NG" },
-      { "in": "ɾ̃",	"out": "NX" },
-      { "in": "p",	"out": "P" },
-      { "in": "ʔ",	"out": "HH" },
-      { "in": "ɹ",	"out": "R" },
-      { "in": "s",	"out": "S" },
-      { "in": "ʃ",	"out": "SH" },
-      { "in": "t",	"out": "T" },
-      { "in": "θ",	"out": "TH" },
-      { "in": "v",	"out": "V" },
-      { "in": "w",	"out": "W" },
-      { "in": "ʍ",	"out": "WH" },
-      { "in": "j",	"out": "Y" },
-      { "in": "z",	"out": "Z" },
-      { "in": "ʒ",	"out": "ZH" }
-   ]
-}
diff --git a/readalongs/log.py b/readalongs/log.py
index 96e1a59a..b342c276 100644
--- a/readalongs/log.py
+++ b/readalongs/log.py
@@ -1,24 +1,16 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-#######################################################################
-#
-# log.py
-#
-#   Setup a logger that has colours!
-#
-#######################################################################
+"""
+log.py: Setup a logger that has colours!
+"""
 
 import logging
 
 import coloredlogs
 
-FIELD_STYLES = dict(levelname=dict(color="green", bold=coloredlogs.CAN_USE_BOLD_FONT),)
+FIELD_STYLES = dict(levelname=dict(color="green", bold=coloredlogs.CAN_USE_BOLD_FONT))
 
 
 def setup_logger(name):
-    """ Create logger and configure with cool colors!
-    """
+    """Create logger and configure with cool colors!"""
     logging.basicConfig(level=logging.INFO)
     logger = logging.getLogger(name)
 
diff --git a/readalongs/portable_tempfile.py b/readalongs/portable_tempfile.py
index e643e17e..45c26ec4 100644
--- a/readalongs/portable_tempfile.py
+++ b/readalongs/portable_tempfile.py
@@ -11,7 +11,7 @@
 
 
 class _PortableNamedTemporaryFileWrapper:
-    """ Wrapper object around the real NamedTemporaryFile that forwards calls as needed
+    """Wrapper object around the real NamedTemporaryFile that forwards calls as needed
 
     The difference with NamedTemporaryFile is that we cleanup on exit and del, rather
     than on close.
@@ -54,7 +54,7 @@ def cleanup(self):
 def PortableNamedTemporaryFile(
     mode="w+b", suffix="", prefix=template, dir=None, delete=True
 ):
-    """ Portable named temporary file that works on Windows, Linux and Mac.
+    """Portable named temporary file that works on Windows, Linux and Mac.
 
     This class wraps tempfile.NamedTemporaryFile() with a portable behaviour that works
     on Windows, Linux and Mac as we need it to.
diff --git a/readalongs/run.py b/readalongs/run.py
index f57a7885..12edbf5b 100644
--- a/readalongs/run.py
+++ b/readalongs/run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 #####################################################################################
@@ -22,8 +22,7 @@
 
 
 def run():
-    """ Run app using SocketIO
-    """
+    """Run app using SocketIO"""
     socketio.run(app)
 
 
diff --git a/readalongs/static/model/cmusphinx-en-us-5.2/README b/readalongs/static/model/cmusphinx-en-us-5.2/README
new file mode 100644
index 00000000..53ee8b32
--- /dev/null
+++ b/readalongs/static/model/cmusphinx-en-us-5.2/README
@@ -0,0 +1,34 @@
+/* ====================================================================
+ * Copyright (c) 2015 Alpha Cephei Inc. All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ALPHA CEPHEI INC. ``AS IS'' AND.
+ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,.
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ALPHA CEPHEI INC.
+ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT.
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,.
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY.
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT.
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE.
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ====================================================================
+ *
+ */
+
+This directory contains generic US english acoustic model trained with
+latest sphinxtrain.
diff --git a/readalongs/static/model/cmusphinx-en-us-5.2/feat_params.json b/readalongs/static/model/cmusphinx-en-us-5.2/feat_params.json
new file mode 100644
index 00000000..85120078
--- /dev/null
+++ b/readalongs/static/model/cmusphinx-en-us-5.2/feat_params.json
@@ -0,0 +1,11 @@
+{
+    "lowerf": 130,
+    "upperf": 6800,
+    "nfilt": 25,
+    "transform": "dct",
+    "lifter": 22,
+    "feat": "1s_c_d_dd",
+    "cmn": "current",
+    "varnorm": false,
+    "cmninit": "40,3,-1",
+}
diff --git a/readalongs/static/model/cmusphinx-en-us-5.2/feature_transform b/readalongs/static/model/cmusphinx-en-us-5.2/feature_transform
new file mode 100644
index 00000000..78b4f937
Binary files /dev/null and b/readalongs/static/model/cmusphinx-en-us-5.2/feature_transform differ
diff --git a/readalongs/static/model/cmusphinx-en-us-5.2/mdef b/readalongs/static/model/cmusphinx-en-us-5.2/mdef
new file mode 100644
index 00000000..ed57c16d
Binary files /dev/null and b/readalongs/static/model/cmusphinx-en-us-5.2/mdef differ
diff --git a/readalongs/static/model/cmusphinx-en-us-5.2/mdef.ci b/readalongs/static/model/cmusphinx-en-us-5.2/mdef.ci
new file mode 100644
index 00000000..7bc2bc75
Binary files /dev/null and b/readalongs/static/model/cmusphinx-en-us-5.2/mdef.ci differ
diff --git a/readalongs/static/model/cmusphinx-en-us-5.2/means b/readalongs/static/model/cmusphinx-en-us-5.2/means
new file mode 100644
index 00000000..82a41221
Binary files /dev/null and b/readalongs/static/model/cmusphinx-en-us-5.2/means differ
diff --git a/readalongs/static/model/cmusphinx-en-us-5.2/mixture_weights b/readalongs/static/model/cmusphinx-en-us-5.2/mixture_weights
new file mode 100644
index 00000000..04a06a75
Binary files /dev/null and b/readalongs/static/model/cmusphinx-en-us-5.2/mixture_weights differ
diff --git a/readalongs/static/model/cmusphinx-en-us-5.2/noisedict.txt b/readalongs/static/model/cmusphinx-en-us-5.2/noisedict.txt
new file mode 100644
index 00000000..00e4c908
--- /dev/null
+++ b/readalongs/static/model/cmusphinx-en-us-5.2/noisedict.txt
@@ -0,0 +1,9 @@
+<s> SIL
+</s> SIL
+<sil> SIL
+[BREATH] +BREATH+
+[COUGH] +COUGH+
+[NOISE] +NOISE+
+[SMACK] +SMACK+
+[UH] +UH+
+[UM] +UM+
diff --git a/readalongs/static/model/cmusphinx-en-us-5.2/transition_matrices b/readalongs/static/model/cmusphinx-en-us-5.2/transition_matrices
new file mode 100644
index 00000000..806ff991
Binary files /dev/null and b/readalongs/static/model/cmusphinx-en-us-5.2/transition_matrices differ
diff --git a/readalongs/static/model/cmusphinx-en-us-5.2/variances b/readalongs/static/model/cmusphinx-en-us-5.2/variances
new file mode 100644
index 00000000..4c6ffbb2
Binary files /dev/null and b/readalongs/static/model/cmusphinx-en-us-5.2/variances differ
diff --git a/readalongs/templates/base.html b/readalongs/templates/base.html
index e22c84f6..123fee22 100644
--- a/readalongs/templates/base.html
+++ b/readalongs/templates/base.html
@@ -33,8 +33,8 @@
 <!-- Toastr -->
 <script src="{{ url_for('static', filename='js/toastr.min.js') }}"></script>
 <!-- ReadAlong Web Component -->
-<script type="module" src='https://unpkg.com/@roedoejet/readalong@latest/dist/read-along/read-along.esm.js'></script>
-<script nomodule src='https://unpkg.com/@roedoejet/readalong@latest/dist/read-along/read-along.js'></script>
+<script type="module" src='https://unpkg.com/@roedoejet/readalong@^0.1.6/dist/read-along/read-along.esm.js'></script>
+<script nomodule src='https://unpkg.com/@roedoejet/readalong@^0.1.6/dist/read-along/read-along.js'></script>
 <!-- Main JS -->
 <script src="{{ url_for('static', filename='js/main.js') }}"></script>
 
diff --git a/readalongs/templates/export.html b/readalongs/templates/export.html
index ce538f57..8dc96d30 100644
--- a/readalongs/templates/export.html
+++ b/readalongs/templates/export.html
@@ -10,8 +10,8 @@ <h2 class="subtitle">
         <h1 class='title'>Code</h1>
         <p>Here's a snippet of code to embed in your site!</p>
         <code>
-            &lt;script type="module" src='https://unpkg.com/@roedoejet/readalong/dist/read-along/read-along.esm.js'&gt;&lt;/script&gt;
-            &lt;script nomodule src='https://unpkg.com/@roedoejet/readalong/dist/read-along/read-along.js'&gt;&lt;/script&gt;
+            &lt;script type="module" src='https://unpkg.com/@roedoejet/readalong@^0.1.6/dist/read-along/read-along.esm.js'&gt;&lt;/script&gt;
+            &lt;script nomodule src='https://unpkg.com/@roedoejet/readalong@^0.1.6/dist/read-along/read-along.js'&gt;&lt;/script&gt;
             &lt;read-along text="aligned.xml" alignment="aligned.smil" audio="aligned{{data.audio_ext}}"&gt;&lt;/read-along&gt;
         </code>
     </div>
@@ -31,6 +31,11 @@ <h1 class='title'>ReadAlong</h1>
         <h1 class='title'>Log</h1>
         <p>Here's a log for debugging.</p>
         <p>{{data.log}}</p>
+        {% if 'log_lines' in data %}
+           {% for line in data.log_lines %}
+               <p>{{line}}</p>
+           {% endfor %}
+        {% endif %}
     </div>
     {% endif %}
 </section>
diff --git a/readalongs/text/add_elements_to_xml.py b/readalongs/text/add_elements_to_xml.py
index 2a0cc10f..589345f1 100644
--- a/readalongs/text/add_elements_to_xml.py
+++ b/readalongs/text/add_elements_to_xml.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
 ###################################################
 #
 # add_elements_to_xml.py
diff --git a/readalongs/text/add_ids_to_xml.py b/readalongs/text/add_ids_to_xml.py
index a2a81de9..ce0148f2 100644
--- a/readalongs/text/add_ids_to_xml.py
+++ b/readalongs/text/add_ids_to_xml.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
 ###################################################
 #
 # add_ids_to_xml.py
@@ -16,15 +13,12 @@
 #
 ###################################################
 
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
 from collections import defaultdict
 from copy import deepcopy
 
 from lxml import etree
 
-from readalongs.text.util import is_do_not_align, load_xml, save_xml
+from readalongs.text.util import is_do_not_align
 
 TAG_TO_ID = {
     "text": "t",
@@ -42,7 +36,7 @@
 
 
 def add_ids_aux(element: etree, ids: defaultdict, parent_id: str = "") -> defaultdict:
-    """ Add ids to xml element
+    """Add ids to xml element
 
     Args:
         element (etree): Element to add ids to
@@ -119,19 +113,3 @@ def add_ids(xml: etree) -> etree:
             continue
         ids = add_ids_aux(child, ids)
     return xml
-
-
-def go(input_filename: str, output_filename: str) -> None:
-    xml = load_xml(input_filename)
-    xml = add_ids(xml)
-    save_xml(output_filename, xml)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Convert XML to another orthography while preserving tags"
-    )
-    parser.add_argument("input", type=str, help="Input XML")
-    parser.add_argument("output", type=str, help="Output XML")
-    args = parser.parse_args()
-    go(args.input, args.output)
diff --git a/readalongs/text/convert_xml.py b/readalongs/text/convert_xml.py
index 0672e353..c34c4179 100644
--- a/readalongs/text/convert_xml.py
+++ b/readalongs/text/convert_xml.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python3
-
 ###########################################################################
 #
 # convert_xml.py
@@ -35,7 +33,6 @@
 # TODO: Document functions
 ############################################################################
 
-import argparse
 import copy
 import os
 import re
@@ -43,22 +40,39 @@
 from readalongs.log import LOGGER
 from readalongs.text.lexicon_g2p import getLexiconG2P
 from readalongs.text.lexicon_g2p_mappings import __file__ as LEXICON_PATH
-from readalongs.text.util import (
-    get_attrib_recursive,
-    get_lang_attrib,
-    load_xml,
-    save_xml,
-)
-from readalongs.util import getLangs
+from readalongs.text.util import get_attrib_recursive, get_word_text, iterate_over_text
+
+
+def get_same_language_units(element):
+    """Find all the text in element, grouped by units of the same language
+
+    Returns: list of (lang, text) pairs
+    """
+    same_language_units = []
+    current_sublang, current_subword = None, None
+    for sublang, subword in iterate_over_text(element):
+        sublang = sublang.strip() if sublang else ""
+        if current_subword and sublang == current_sublang:
+            current_subword += subword
+        else:
+            if current_subword:
+                same_language_units.append((current_sublang, current_subword))
+            current_sublang, current_subword = sublang, subword
+    if current_subword:
+        same_language_units.append((current_sublang, current_subword))
+    return same_language_units
 
 
 def convert_words(  # noqa: C901
-    xml, word_unit="w", output_orthography="eng-arpabet", verbose_warnings=False,
+    xml, word_unit="w", output_orthography="eng-arpabet", verbose_warnings=False
 ):
     """Helper for convert_xml(), with the same Args and Return values, except
     xml is modified in place returned itself, instead of making a copy.
     """
 
+    if output_orthography != "eng-arpabet":
+        LOGGER.info(f"output_orthography={output_orthography}")
+
     # Defer expensive import of g2p to do them only if and when they are needed
     from g2p.mappings.langs.utils import is_arpabet
 
@@ -94,7 +108,10 @@ def convert_word(word: str, lang: str):
             # Note: adding eng_ prefix to vars that are used in both blocks to make mypy
             # happy. Since the two sides of the if and in the same scope, it complains about
             # type checking otherwise.
-            assert output_orthography == "eng-arpabet"
+            if "eng-arpabet" not in output_orthography:
+                raise ValueError(
+                    f'Cannot g2p "eng" to output orthography "{output_orthography}".'
+                )
             eng_converter = getLexiconG2P(
                 os.path.join(os.path.dirname(LEXICON_PATH), "cmu_sphinx.metadata.json")
             )
@@ -112,16 +129,16 @@ def convert_word(word: str, lang: str):
                 converter = make_g2p(lang, output_orthography)
             except InvalidLanguageCode as e:
                 raise ValueError(
-                    f'Could not g2p "{word}" as "{lang}": invalid language code. '
-                    f"Use one of {getLangs()[0]}"
+                    f'Could not g2p "{word}" from "{lang}" to "{output_orthography}": {e} '
+                    f'\nRun "readalongs langs" to list languages supported by ReadAlongs Studio.'
                 ) from e
             except NoPath as e:
                 raise ValueError(
-                    f'Count not g2p "{word}" as "{lang}": no path to "{output_orthography}". '
-                    f"Use one of {getLangs()[0]}"
+                    f'Could not g2p "{word}": no path from "{lang}" to "{output_orthography}".'
+                    f'\nRun "readalongs langs" to list languages supported by ReadAlongs Studio.'
                 ) from e
             tg = converter(word)
-            text = tg.output_string.strip()
+            text = tg.output_string
             valid = converter.check(tg, shallow=True)
             if not valid and verbose_warnings:
                 converter.check(tg, shallow=False, display_warnings=verbose_warnings)
@@ -134,55 +151,66 @@ def convert_word(word: str, lang: str):
             arpabet = word.attrib["ARPABET"]
             if not is_arpabet(arpabet):
                 LOGGER.warning(
-                    f'Pre-g2p\'d text "{word.text}" has invalid ARPABET conversion "{arpabet}"'
+                    f'Pre-g2p\'d text "{get_word_text(word)}" has invalid ARPABET conversion "{arpabet}"'
                 )
                 all_g2p_valid = False
             continue
         # only convert text within words
-        if not word.text:
+        same_language_units = get_same_language_units(word)
+        if not same_language_units:
             continue
-        g2p_lang = get_lang_attrib(word) or "und"  # default: Undetermined
-        g2p_fallbacks = get_attrib_recursive(word, "fallback-langs")
-        text_to_g2p = word.text
-        try:
-            g2p_text, valid = convert_word(text_to_g2p, g2p_lang.strip())
-            if not valid:
-                # This is where we apply the g2p cascade
-                for lang in re.split(r"[,:]", g2p_fallbacks) if g2p_fallbacks else []:
-                    LOGGER.warning(
-                        f'Could not g2p "{text_to_g2p}" as {g2p_lang}. '
-                        f"Trying fallback: {lang}."
-                    )
-                    g2p_lang = lang.strip()
-                    g2p_text, valid = convert_word(text_to_g2p, g2p_lang)
-                    if valid:
-                        word.attrib["effective-g2p-lang"] = g2p_lang
-                        break
-                else:
-                    all_g2p_valid = False
-                    LOGGER.warning(
-                        f'No valid g2p conversion found for "{text_to_g2p}". '
-                        f"Check its orthography and language code, "
-                        f"or pick suitable g2p fallback languages."
-                    )
-
-            # Save the g2p_text from the last conversion attemps, even when
-            # it's not valid, so it's in the g2p output if the user wants to
-            # inspect it manually.
-            word.attrib["ARPABET"] = g2p_text
-
-        except ValueError as e:
-            LOGGER.warning(
-                f'Could not g2p "{text_to_g2p}" due to an incorrect '
-                f'"xml:lang", "lang" or "fallback-langs" attribute in the XML: {e}'
-            )
-            all_g2p_valid = False
+        all_arpabet = ""
+        for lang, text in same_language_units:
+            g2p_lang = lang or "und"  # default: Undetermined
+            g2p_fallbacks = get_attrib_recursive(word, "fallback-langs")
+            text_to_g2p = text.strip()
+            try:
+                g2p_text, valid = convert_word(text_to_g2p, g2p_lang)
+                if not valid:
+                    # This is where we apply the g2p cascade
+                    for lang in (
+                        re.split(r"[,:]", g2p_fallbacks) if g2p_fallbacks else []
+                    ):
+                        LOGGER.warning(
+                            f'Could not g2p "{text_to_g2p}" as {g2p_lang}. '
+                            f"Trying fallback: {lang}."
+                        )
+                        g2p_lang = lang.strip()
+                        g2p_text, valid = convert_word(text_to_g2p, g2p_lang)
+                        if valid:
+                            word.attrib["effective-g2p-lang"] = g2p_lang
+                            break
+                    else:
+                        all_g2p_valid = False
+                        LOGGER.warning(
+                            f'No valid g2p conversion found for "{text_to_g2p}". '
+                            f"Check its orthography and language code, "
+                            f"or pick suitable g2p fallback languages."
+                        )
+
+                # Save the g2p_text from the last conversion attemps, even when
+                # it's not valid, so it's in the g2p output if the user wants to
+                # inspect it manually.
+
+                all_arpabet = all_arpabet + " " + g2p_text.strip()
+
+            except ValueError as e:
+                LOGGER.warning(
+                    f'Could not g2p "{text_to_g2p}" due to an incorrect '
+                    f'"xml:lang", "lang" or "fallback-langs" attribute in the XML: {e}'
+                )
+                all_g2p_valid = False
+
+                if not verbose_warnings:
+                    break
+
+        word.attrib["ARPABET"] = all_arpabet.strip()
 
     return xml, all_g2p_valid
 
 
 def convert_xml(
-    xml, word_unit="w", output_orthography="eng-arpabet", verbose_warnings=False,
+    xml, word_unit="w", output_orthography="eng-arpabet", verbose_warnings=False
 ):
     """Convert all the words in XML though g2p, putting the results in attribute ARPABET
 
@@ -203,33 +231,3 @@ def convert_xml(
         xml_copy, word_unit, output_orthography, verbose_warnings
     )
     return xml_copy, valid
-
-
-def go(
-    input_filename, output_filename, word_unit="w", output_orthography="eng-arpabet"
-):
-    xml = load_xml(input_filename)
-    converted_xml = convert_xml(xml, word_unit, output_orthography)
-    save_xml(output_filename, converted_xml)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Convert XML to another orthography while preserving tags"
-    )
-    parser.add_argument("input", type=str, help="Input XML")
-    parser.add_argument("output", type=str, help="Output XML")
-    parser.add_argument(
-        "--word_unit",
-        type=str,
-        default="w",
-        help="XML element that " 'represents a word (default: "w")',
-    )
-    parser.add_argument(
-        "--out_orth",
-        type=str,
-        default="eng-arpabet",
-        help='Output orthography (default: "eng-arpabet")',
-    )
-    args = parser.parse_args()
-    go(args.input, args.output, args.word_unit, args.out_orth)
diff --git a/readalongs/text/end_to_end.py b/readalongs/text/end_to_end.py
index b3d3d2bd..5229eef2 100644
--- a/readalongs/text/end_to_end.py
+++ b/readalongs/text/end_to_end.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 #########################################################################
diff --git a/readalongs/text/lexicon_g2p.py b/readalongs/text/lexicon_g2p.py
index b8fdf6b0..caeb0072 100644
--- a/readalongs/text/lexicon_g2p.py
+++ b/readalongs/text/lexicon_g2p.py
@@ -1,7 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
 #######################################################################
 #
 # lexicon_g2p.py
@@ -25,8 +21,6 @@
 # TODO: Move this to the G2P library
 ######################################################################
 
-from __future__ import division, print_function, unicode_literals
-
 import os
 from collections import defaultdict
 from unicodedata import normalize
diff --git a/readalongs/text/make_dict.py b/readalongs/text/make_dict.py
index a4df04d9..f2eff571 100644
--- a/readalongs/text/make_dict.py
+++ b/readalongs/text/make_dict.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
 ##################################################
 #
 # make_dict.py
@@ -12,15 +9,9 @@
 #
 ##################################################
 
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-
 import chevron
 
 from readalongs.log import LOGGER
-from readalongs.text.util import load_xml, save_txt
 
 DICT_TEMPLATE = """{{#items}}
 {{id}}\t{{pronunciation}}
@@ -28,8 +19,7 @@
 """
 
 
-def make_dict(word_elements, input_filename, unit="m"):
-    data = {"items": []}
+def generate_dict_entries(word_elements, input_filename, unit):
     nwords = 0
     for e in word_elements:
         if "id" not in e.attrib:
@@ -40,29 +30,25 @@ def make_dict(word_elements, input_filename, unit="m"):
         if not text:
             continue
         nwords += 1
-        data["items"].append({"id": e.attrib["id"], "pronunciation": text})
+        yield e.attrib["id"], text
     if nwords == 0:
         raise RuntimeError("No words in dictionary!")
-    return chevron.render(DICT_TEMPLATE, data)
 
 
-def go(input_filename, output_filename, unit):
-    xml = load_xml(input_filename)
-    dct = make_dict(xml.xpath(".//" + unit), input_filename, unit)
-    save_txt(output_filename, dct)
+def make_dict_object(word_elements, input_filename="'in-memory'", unit="m"):
+    return {
+        word_id: text
+        for word_id, text in generate_dict_entries(word_elements, input_filename, unit)
+    }
 
 
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Make a pronunciation dictionary from a G2P'd XML file"
-    )
-    parser.add_argument("input", type=str, help="Input XML")
-    parser.add_argument("output", type=str, help="Output .dict file")
-    parser.add_argument(
-        "--unit",
-        type=str,
-        default="m",
-        help="XML tag of the unit of analysis " '(e.g. "w" for word, "m" for morpheme)',
-    )
-    args = parser.parse_args()
-    go(args.input, args.output, args.unit)
+def make_dict(word_elements, input_filename="'in-memory'", unit="m"):
+    data = {
+        "items": [
+            {"id": word_id, "pronunciation": text}
+            for word_id, text in generate_dict_entries(
+                word_elements, input_filename, unit
+            )
+        ]
+    }
+    return chevron.render(DICT_TEMPLATE, data)
diff --git a/readalongs/text/make_fsg.py b/readalongs/text/make_fsg.py
index e629c3e9..6a1ad4d6 100644
--- a/readalongs/text/make_fsg.py
+++ b/readalongs/text/make_fsg.py
@@ -1,28 +1,19 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-##################################################
-#
-# make_fsg.py
-#
-# This module takes a text file, marked up with
-# units (e.g. w for word, m for morpheme) and ids
-# and converted to IPA, and outputs a FSG
-# file for processing by PocketSphinx.
-#
-##################################################
+"""
+make_fsg.py generate an FSG or a JSGF for a marked up text file.
 
+This module takes a text file, marked up with units (e.g. w for word, m for
+morpheme) and ids and converted to IPA, and outputs an FSG or a JSGF
+file for processing by PocketSphinx, SoundSwallower or SoundSwallower.js
+"""
 
-from __future__ import absolute_import, division, print_function, unicode_literals
 
-import argparse
+import datetime
 import os
 
 import chevron
 from slugify import slugify
 
 from readalongs.log import LOGGER
-from readalongs.text.util import load_xml, save_txt
 
 FSG_TEMPLATE = """FSG_BEGIN {{name}}
 NUM_STATES {{num_states}}
@@ -36,55 +27,78 @@
 """
 
 
-def make_fsg(word_elements, filename):
-    name = slugify(os.path.splitext(os.path.basename(filename))[0])
-    data = {
-        "name": name,  # If name includes special characters, pocketsphinx throws a RuntimeError: new_Decoder returned -1
-        "states": [],
-        "num_states": 0,
-    }
+def get_ids(word_elements: list):
+    """Extract the sequence of id's from word_elements with both an id and
+    an arpabet pronounciation.
+
+    Words with empty ARPABET are skipped because soundswallower and
+    pocketsphinx will error out if we give it words with an empty pronunciation
+    key. In general, what *would* it mean to align sounds to an empty sequence
+    of phonemes, after all???
+
+    Yields:
+        text_ids
+    """
 
     for e in word_elements:
         if "id" not in e.attrib:  # don't put in elements with no id
             continue
-        if not e.text or not e.text.strip():
-            LOGGER.warning("No text in node %s", e.attrib["id"])
+        if not e.attrib.get("ARPABET", "").strip():
+            LOGGER.warning("Skipping node %s with no ARPABET", e.attrib["id"])
             continue
-        text = e.text.strip()
-        # if not e.text.strip():  # don't put in elements with no text
-        #    continue
-        data["states"].append(
-            {
-                "id": e.attrib["id"] if text else "",
-                "current": data["num_states"],
-                "next": data["num_states"] + 1,
-            }
-        )
-        data["num_states"] += 1
-
-    data["final_state"] = data["num_states"]
-    data["num_states"] += 1
+        yield e.attrib["id"]
+
+
+def make_fsg(word_elements: list, filename: str = "'in-memory'") -> str:
+    """Generate an FSG for the given words elements
+
+    Returns: the text contents of the FSG file for processing by PocketSphinx
+    """
+
+    states = [
+        {"id": text_id, "current": i, "next": i + 1}
+        for i, text_id in enumerate(get_ids(word_elements))
+    ]
+
+    data = {
+        # If name includes special characters, pocketsphinx throws a RuntimeError:
+        # new_Decoder returned -1, so pass it through slugify() first
+        "name": slugify(os.path.splitext(os.path.basename(filename))[0]),
+        "states": states,
+        "final_state": len(states),
+        "num_states": len(states) + 1,
+    }
 
     return chevron.render(FSG_TEMPLATE, data)
 
 
-def go(input_filename, output_filename, unit):
-    xml = load_xml(input_filename)
-    fsg = make_fsg(xml.xpath(".//" + unit), input_filename, unit)
-    save_txt(output_filename, fsg)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Make an FSG grammar from an XML file with IDs"
-    )
-    parser.add_argument("input", type=str, help="Input XML")
-    parser.add_argument("output_fsg", type=str, help="Output .fsg file")
-    parser.add_argument(
-        "--unit",
-        type=str,
-        default="m",
-        help="XML tag of the unit of analysis " '(e.g. "w" for word, "m" for morpheme)',
-    )
-    args = parser.parse_args()
-    go(args.input, args.output_fsg, args.unit)
+JSGF_TEMPLATE = """#JSGF 1.0 UTF-8;
+grammar {{name}};
+
+/**
+    * Auto-generated JSGF grammar for the document {{name}}.
+    *
+    * @author Automatically generated by make_jsgf
+    * @version 1.0
+    * @since {{date}}
+    */
+
+public <s> = {{#words}} {{id}} {{/words}} ;
+"""
+
+
+def make_jsgf(word_elements: list, filename: str = "'in-memory'") -> str:
+    """Generate a JSGF for the given words elements
+
+    JSGF = Java Speech Grammar Format
+
+    Returns:
+        the text contents of the JSGF file for processing by SoundSwallower.js
+    """
+    data = {
+        "name": os.path.splitext(os.path.basename(filename))[0],
+        "date": datetime.datetime.today().strftime("%Y-%m-%d"),
+        "words": [{"id": text_id} for text_id in get_ids(word_elements)],
+    }
+
+    return chevron.render(JSGF_TEMPLATE, data)
diff --git a/readalongs/text/make_jsgf.py b/readalongs/text/make_jsgf.py
deleted file mode 100644
index 538cffab..00000000
--- a/readalongs/text/make_jsgf.py
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-##################################################
-#
-# make_fsg.py
-#
-# This module takes a text file, marked up with
-# units (e.g. w for word, m for morpheme) and ids
-# and converted to IPA, and outputs a FSG
-# file for processing by PocketSphinx.
-#
-# TODO: AP: This docstring seems to have been copied from make_fsg
-#           and doesn't appear to be used. Do we need this file?
-##################################################
-
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
-import datetime
-import os
-
-import chevron
-
-from readalongs.text.util import load_xml, save_txt
-
-JSGF_TEMPLATE = """#JSGF 1.0 UTF-8;
-grammar {{name}};
-
-/**
-    * Auto-generated JSGF grammar for the document {{name}}.
-    *
-    * @author Automatically generated by make_jsgf.py
-    * @version 1.0
-    * @since {{date}}
-    */
-
-public <s> = {{#words}} {{id}} {{/words}} ;
-"""
-
-
-def make_jsgf(xml, filename, unit="m"):
-    data = {
-        "name": os.path.splitext(os.path.basename(filename))[0],
-        "date": datetime.datetime.today().strftime("%Y-%m-%d"),
-        "words": [],
-    }
-
-    for e in xml.xpath(".//" + unit):
-        if "id" not in e.attrib:  # don't put in elements with no id
-            continue
-        text = e.text.strip()
-        if text == "":  # don't put in elements with no text
-            continue
-        id = e.attrib["id"]
-        data["words"].append({"id": id})
-
-    return chevron.render(JSGF_TEMPLATE, data)
-
-
-def go(input_filename, output_filename, unit):
-    xml = load_xml(input_filename)
-    jsgf = make_jsgf(xml, input_filename, unit)
-    save_txt(output_filename, jsgf)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Make an JSGF grammar from an XML file with IDs"
-    )
-    parser.add_argument("input", type=str, help="Input XML")
-    parser.add_argument("output_jsgf", type=str, help="Output .jsgf file")
-    parser.add_argument(
-        "--unit",
-        type=str,
-        default="m",
-        help="XML tag of the unit of analysis " '(e.g. "w" for word, "m" for morpheme)',
-    )
-    args = parser.parse_args()
-    go(args.input, args.output_fsg, args.unit)
diff --git a/readalongs/text/make_package.py b/readalongs/text/make_package.py
index cb6b7be8..75691eea 100644
--- a/readalongs/text/make_package.py
+++ b/readalongs/text/make_package.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
 ###################################################
 #
 # make_package.py
@@ -24,8 +21,8 @@
 
 from readalongs.log import LOGGER
 
-JS_BUNDLE_URL = "https://unpkg.com/@roedoejet/readalong/dist/bundle.js"
-FONTS_BUNDLE_URL = "https://unpkg.com/@roedoejet/readalong/dist/fonts.b64.css"
+JS_BUNDLE_URL = "https://unpkg.com/@roedoejet/readalong@^0.1.6/dist/bundle.js"
+FONTS_BUNDLE_URL = "https://unpkg.com/@roedoejet/readalong@^0.1.6/dist/fonts.b64.css"
 
 BASIC_HTML = """
 <!DOCTYPE html>
@@ -60,28 +57,34 @@ def encode_from_path(path: str) -> str:
 
     with open(path, "rb") as f:
         path_bytes = f.read()
-    if path.endswith("xml"):
+    if str(path).endswith("xml"):
         root = etree.fromstring(path_bytes)
         for img in root.xpath("//graphic"):
             url = img.get("url")
-            res = requests.get(url) if url.startswith("http") else None
+            if url.startswith("http"):
+                try:
+                    request_result = requests.get(url)
+                except requests.exceptions.RequestException:
+                    request_result = None
+            else:
+                request_result = None
             mime = guess_type(url)
             if os.path.exists(url):
                 with open(url, "rb") as f:
                     img_bytes = f.read()
                 img_b64 = str(b64encode(img_bytes), encoding="utf8")
-            elif res and res.status_code == 200:
-                img_b64 = str(b64encode(res.content), encoding="utf8")
+            elif request_result and request_result.status_code == 200:
+                img_b64 = str(b64encode(request_result.content), encoding="utf8")
             else:
-                LOGGER.warn(
-                    f"The image declared at {url} could not be found. Please check that it exists."
+                LOGGER.warning(
+                    f"The image declared at {url} could not be found. Please check that it exists or that the URL is valid."
                 )
                 continue
             img.attrib["url"] = f"data:{mime[0]};base64,{img_b64}"
         path_bytes = etree.tostring(root)
     b64 = str(b64encode(path_bytes), encoding="utf8")
     mime = guess_type(path)
-    if path.endswith(
+    if str(path).endswith(
         ".m4a"
     ):  # hack to get around guess_type choosing the wrong mime type for .m4a files
         # TODO: Check other popular audio formats, .wav, .mp3, .ogg, etc...
@@ -92,7 +95,7 @@ def encode_from_path(path: str) -> str:
         )  # Hack: until we properly extract audio from video files, force any video-based mime type to be read as audio
     else:
         mime_type = "application"
-        LOGGER.warn(
+        LOGGER.warning(
             f"We could not guess the mime type of file at {path}, we will try the generic mime type 'application', but this might not work with some files"
         )
     return f"data:{mime_type};base64,{b64}"
@@ -112,7 +115,7 @@ def create_web_component_html(
     js = requests.get(JS_BUNDLE_URL)
     fonts = requests.get(FONTS_BUNDLE_URL)
     if js.status_code != 200:
-        LOGGER.warn(
+        LOGGER.warning(
             f"Sorry, the JavaScript bundle that is supposed to be at {JS_BUNDLE_URL} returned a {js.status_code}. Your ReadAlong will be bundled using a version that may not be up-to-date. Please check your internet connection."
         )
         with open(
@@ -122,7 +125,7 @@ def create_web_component_html(
     else:
         js_raw = js.text
     if fonts.status_code != 200:
-        LOGGER.warn(
+        LOGGER.warning(
             f"Sorry, the fonts bundle that is supposed to be at {FONTS_BUNDLE_URL} returned a {fonts.status_code}. Your ReadAlong will be bundled using a version that may not be up-to-date. Please check your internet connection."
         )
         with open(
diff --git a/readalongs/text/make_smil.py b/readalongs/text/make_smil.py
index d3d91d7d..a766ad38 100644
--- a/readalongs/text/make_smil.py
+++ b/readalongs/text/make_smil.py
@@ -1,21 +1,16 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-###################################################################
-#
-# make_smil.py
-#
-#   Turns alignment into formatted SMIL for ReadAlongs WebComponent
-####################################################################
+"""
+make_smil.py
 
+Turns alignment into formatted SMIL for ReadAlongs WebComponent
+"""
 
-import argparse
+from typing import List
 
 import chevron
+from lxml import etree
 
-from readalongs.text.util import save_txt
-
-SMIL_TEMPLATE = """<smil xmlns="http://www.w3.org/ns/SMIL" version="3.0">
+SMIL_TEMPLATE = """\
+<smil xmlns="http://www.w3.org/ns/SMIL" version="3.0">
     <body>
         {{#words}}
         <par id="par-{{id}}">
@@ -35,59 +30,72 @@
 END_SUBIDX = 3
 
 
-def parse_hypseg(text):
-    """Parse hypseg alignments file and return alignements
-
-    Args:
-        text(str): hypseg text
-
-    Returns:
-        dict: a dictionary of all start and end points for each word in text
-    """
-    results = {"words": []}
-    tokens = text.strip().split()
-    # results["basename"] = tokens[BASENAME_IDX]
-    start = float(tokens[START_TIME_IDX]) * 0.01
-    i = WORDS_IDX
-    while i < len(tokens):
-        word = tokens[i + WORD_SUBIDX]
-        end = tokens[i + END_SUBIDX]
-        end = float(end) * 0.01
-        if word != "<sil>":
-            results["words"].append({"id": word, "start": start, "end": end})
-        start = end
-        i += WORD_SPAN
-    return results
-
-
-def make_smil(text_path: str, audio_path: str, results: dict) -> str:
+def make_smil(text_path: str, audio_path: str, words: List[dict]) -> str:
     """Actually render the SMIL
 
+    words is a list of dicts with these elements:
+    {
+        "id": word id (str),
+        "start": word start time in seconds (float),
+        "end": word_end_time_in_seconds (float),
+    }
+
     Args:
-        text_path(str): path to text
-        audio_path(str): path to audio
-        results(dict): all alignements
+        text_path (str): path to text
+        audio_path (str): path to audio
+        words (List[dict]): all alignments
 
     Returns:
         str: formatted SMIL
     """
-    results["text_path"] = text_path
-    results["audio_path"] = audio_path
-    return chevron.render(SMIL_TEMPLATE, results)
+    return chevron.render(
+        SMIL_TEMPLATE,
+        {"text_path": text_path, "audio_path": audio_path, "words": words},
+    )
+
 
+def parse_smil(formatted_smil: str) -> List[dict]:
+    """Extract the list of words and their alignment from a SMIL file content.
 
-def go(seg_path, text_path, audio_path, output_path):
-    results = make_smil(text_path, audio_path, parse_hypseg(seg_path))
-    save_txt(output_path, results)
+    Args:
+        formatted_smil (str): the raw, unparsed XML content of the .smil file
 
+    Returns:
+        List[dict]: a list of dicts with these elements:
+            {
+                "id": word id (str),
+                "start": word start time in seconds (float),
+                "end": word_end_time_in_seconds (float),
+            }
+    Raises:
+        ValueError if there is a problem parsing formatted_smil as valid SMIL
+    """
 
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Convert XML to another orthography while preserving tags"
-    )
-    parser.add_argument("input_seg", type=str, help="Input hypseg file")
-    parser.add_argument("text_path", type=str, help="Text filename")
-    parser.add_argument("audio_path", type=str, help="Audio filename")
-    parser.add_argument("output", type=str, help="Output SMIL file")
-    args = parser.parse_args()
-    go(args.input_seg, args.text_path, args.audio_path, args.output)
+    please_msg = "Please make sure your SMIL file is valid."
+
+    try:
+        xml = etree.fromstring(formatted_smil)
+    except etree.ParseError as e:
+        raise ValueError(f"Invalid SMIL file: {e}. {please_msg}")
+    ns = {"smil": "http://www.w3.org/ns/SMIL"}
+
+    words = []
+    for par_el in xml.xpath(".//smil:par", namespaces=ns):
+        text_src = par_el.find("smil:text", namespaces=ns).attrib["src"]
+        _, _, text_id = text_src.partition("#")
+        if not text_id:
+            raise ValueError(f"Missing word id. {please_msg}")
+        audio_el = par_el.find("smil:audio", namespaces=ns)
+        try:
+            clip_begin = float(audio_el.attrib["clipBegin"])
+            clip_end = float(audio_el.attrib["clipEnd"])
+        except KeyError as e:
+            raise ValueError(f"Missing 'clipBegin' or 'clipEnd'. {please_msg}") from e
+        except ValueError as e:
+            raise ValueError(
+                f"Invalid 'clipBegin' or 'clipEnd': {e}. {please_msg}."
+            ) from e
+
+        words.append({"id": text_id, "start": clip_begin, "end": clip_end})
+
+    return words
diff --git a/readalongs/text/tokenize_xml.py b/readalongs/text/tokenize_xml.py
index a3e06d4d..b53a9227 100644
--- a/readalongs/text/tokenize_xml.py
+++ b/readalongs/text/tokenize_xml.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
 ##################################################
 #
 # tokenize_xml.py
@@ -30,21 +27,12 @@
 ##################################################
 
 
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import argparse
 from copy import deepcopy
 
 from lxml import etree
 
 from readalongs.log import LOGGER
-from readalongs.text.util import (
-    get_lang_attrib,
-    is_do_not_align,
-    load_xml,
-    save_xml,
-    unicode_normalize_xml,
-)
+from readalongs.text.util import get_lang_attrib, is_do_not_align, unicode_normalize_xml
 
 
 def tokenize_xml_in_place(xml):
@@ -57,10 +45,16 @@ def tokenize_xml_in_place(xml):
         etree: tokenized xml
     """
 
-    from g2p.mappings.tokenizer import get_tokenizer  # Defer expensive import
+    # Defer expensive import, and use the new version, but keep it
+    # compatible with older versions of g2p for at least a little while.
+    try:
+        from g2p import make_tokenizer
+    except ImportError:
+        from g2p import get_tokenizer as make_tokenizer
 
     def add_word_children(element):
         """Recursive helper for tokenize_xml_in_place()"""
+
         tag = etree.QName(element.tag).localname
         nsmap = element.nsmap if hasattr(element, "nsmap") else element.getroot().nsmap
         if tag in ["w", "teiHeader", "head"]:  # don't do anything to existing words!
@@ -78,7 +72,7 @@ def add_word_children(element):
             new_element.attrib[key] = value
 
         lang = get_lang_attrib(element)
-        tokenizer = get_tokenizer(lang)
+        tokenizer = make_tokenizer(lang)
         if element.text:
             new_element.text = ""
             for unit in tokenizer.tokenize_text(element.text):
@@ -129,19 +123,3 @@ def tokenize_xml(xml):
         return xml
     LOGGER.info("Words (<w>) not present; tokenizing")
     return tokenize_xml_in_place(xml)
-
-
-def go(input_filename, output_filename):
-    xml = load_xml(input_filename)
-    xml = tokenize_xml(xml)
-    save_xml(output_filename, xml)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Convert XML to another orthography while preserving tags"
-    )
-    parser.add_argument("input", type=str, help="Input XML")
-    parser.add_argument("output", type=str, help="Output XML")
-    args = parser.parse_args()
-    go(args.input, args.output)
diff --git a/readalongs/text/util.py b/readalongs/text/util.py
index f95f44dd..159699a8 100644
--- a/readalongs/text/util.py
+++ b/readalongs/text/util.py
@@ -1,6 +1,3 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
 ###########################################
 #
 # util.py
@@ -69,7 +66,27 @@ def get_attrib_recursive(element, *attribs):
         return None
 
 
-def get_lang_attrib(element):
+def iterate_over_text(element: etree.ElementTree):
+    """Iterate over all actual text contained with element and its sub-elements
+
+    Yields:
+        (language_code, text) pairs
+    """
+    lang = get_lang_attrib(element)
+    if element.text:
+        yield (lang, element.text)
+    for child in element:
+        yield from iterate_over_text(child)
+        if child.tail:
+            yield (lang, child.tail)
+
+
+def get_word_text(word_element: etree.ElementTree) -> str:
+    """Given a word element, extract all its text"""
+    return "".join(text for _, text in iterate_over_text(word_element))
+
+
+def get_lang_attrib(element: etree.ElementTree):
     """Return the xml:lang (in priority) or lang (fallback) attribute from element
     or its closest ancestor that has either, or None when neither is found.
     """
@@ -93,18 +110,18 @@ def load_xml_zip(zip_path, input_path):
 
 
 def load_xml_with_encoding(input_path):
-    """ etree.fromstring messes up on declared encodings """
+    """etree.fromstring messes up on declared encodings"""
     return etree.parse(input_path)
 
 
 def write_xml(output_filelike, xml):
-    """ Write XML to already opened file-like object """
+    """Write XML to already opened file-like object"""
     output_filelike.write(etree.tostring(xml, encoding="utf-8", xml_declaration=True))
     output_filelike.write("\n".encode("utf-8"))
 
 
 def save_xml(output_path, xml):
-    """ Save XML to specific PATH """
+    """Save XML to specific PATH"""
     ensure_dirs(output_path)
     with open(output_path, "wb") as fout:
         write_xml(fout, xml)
@@ -120,7 +137,7 @@ def save_xml_zip(zip_path, output_path, xml):
 
 
 def load_txt(input_path):
-    with open(input_path, "r", encoding="utf-8") as fin:
+    with open(input_path, "r", encoding="utf-8-sig") as fin:
         return fin.read()
 
 
@@ -144,7 +161,7 @@ def save_txt_zip(zip_path, output_path, txt):
 
 
 def load_json(input_path):
-    with open(input_path, "r", encoding="utf-8") as fin:
+    with open(input_path, "r", encoding="utf-8-sig") as fin:
         return json.load(fin, object_pairs_hook=OrderedDict)
 
 
@@ -178,32 +195,46 @@ def copy_file_to_zip(zip_path, origin_path, destination_path):
 <html lang="en">
     <head>
         <meta charset="UTF-8">
-        <title>Insert Title Here</title>
+        <title>{title}</title>
         <!-- Import fonts. Material Icons are needed by the web component -->
         <link href="https://fonts.googleapis.com/css?family=Lato%7CMaterial+Icons%7CMaterial+Icons+Outlined" rel="stylesheet">
     </head>
 
     <body>
         <!-- Here is how you declare the Web Component. Supported languages: en, fr -->
-        <read-along text="{}" alignment="{}" audio="{}" language="en">
-            <span slot="read-along-header">Insert Title Here Too</span>
+        <read-along text="{text}" alignment="{smil}" audio="{audio}" theme="{theme}" language="en">
+            <span slot='read-along-header'>{header}</span>
+            <span slot='read-along-subheader'>{subheader}</span>
         </read-along>
     </body>
 
     <!-- The last step needed is to import the package -->
-    <script type="module" src='https://unpkg.com/@roedoejet/readalong@latest/dist/read-along/read-along.esm.js'></script>
-    <script nomodule src='https://unpkg.com/@roedoejet/readalong@latest/dist/read-along/read-along.js'></script>
+    <script type="module" src='https://unpkg.com/@roedoejet/readalong@^0.1.6/dist/read-along/read-along.esm.js'></script>
+    <script nomodule src='https://unpkg.com/@roedoejet/readalong@^0.1.6/dist/read-along/read-along.js'></script>
 </html>
 """
 
 
 def save_minimal_index_html(
-    output_path, tokenized_xml_basename, smil_basename, audio_basename
+    output_path,
+    tokenized_xml_basename,
+    smil_basename,
+    audio_basename,
+    title,
+    header,
+    subheader,
+    theme,
 ):
     with open(output_path, "w", encoding="utf-8") as fout:
         fout.write(
             MINIMAL_INDEX_HTML_TEMPLATE.format(
-                tokenized_xml_basename, smil_basename, audio_basename
+                title=title,
+                text=tokenized_xml_basename,
+                smil=smil_basename,
+                audio=audio_basename,
+                theme=theme,
+                header=header,
+                subheader=subheader,
             )
         )
 
@@ -218,7 +249,7 @@ def unicode_normalize_xml(element):
 
 
 def parse_time(time_string: str) -> int:
-    """ Parse a time stamp in h/m/s(default)/ms or any combination of these units.
+    """Parse a time stamp in h/m/s(default)/ms or any combination of these units.
 
     Args:
         time_string (str): timestamp, e.g., "0.23s", "5.234" (implied s), "1234 ms",
diff --git a/readalongs/util.py b/readalongs/util.py
index 4909a539..7cd3d5bb 100644
--- a/readalongs/util.py
+++ b/readalongs/util.py
@@ -1,6 +1,5 @@
 import re
 from collections.abc import Iterable
-from itertools import tee
 
 import click
 
@@ -8,16 +7,16 @@
 LANG_NAMES = None
 
 
-def getLangsDeferred() -> Iterable:
+def get_langs_deferred() -> Iterable:
     """Lazilly get the list of language codes supported by g2p library
 
     Yields an Iterable in such a way that the g2p database is only loaded when
     the results are iterated over, rather than when this function is called.
     """
-    yield from getLangs()[0]
+    yield from get_langs()[0]
 
 
-def getLangs():
+def get_langs():
     """Get the list of language codes and names supported by the g2p library
 
     Returns:
@@ -39,23 +38,22 @@ def getLangs():
         import g2p.mappings.langs as g2p_langs
         from networkx import has_path
 
-        # LANGS_AVAILABLE in g2p lists langs inferred by the directory structure of
+        # langs_available in g2p lists langs inferred by the directory structure of
         # g2p/mappings/langs, but in ReadAlongs, we need all input languages to any mappings.
         # E.g., for Michif, we need to allow crg-dv and crg-tmd, but not crg, which is what
-        # LANGS_AVAILABLE contains. So we define our own list of languages here.
-        LANGS_AVAILABLE = []
+        # langs_available contains. So we define our own list of languages here.
+        langs_available = []
 
-        # Set up LANG_NAMES hash table for studio UI to
-        # properly name the dropdown options
-        LANG_NAMES = {"eng": "English"}
+        # this will be the set of all langs in g2p + "eng", which we need temporarily
+        full_lang_names = {"eng": "English"}
 
-        for k, v in g2p_langs.LANGS.items():
+        for _, v in g2p_langs.LANGS.items():
             for mapping in v["mappings"]:
                 # add mapping to names hash table
-                LANG_NAMES[mapping["in_lang"]] = mapping["language_name"]
+                full_lang_names[mapping["in_lang"]] = mapping["language_name"]
                 # add input id to all available langs list
-                if mapping["in_lang"] not in LANGS_AVAILABLE:
-                    LANGS_AVAILABLE.append(mapping["in_lang"])
+                if mapping["in_lang"] not in langs_available:
+                    langs_available.append(mapping["in_lang"])
 
         # get the key from all networks in g2p module that have a path to 'eng-arpabet',
         # which is needed for the readalongs
@@ -63,7 +61,7 @@ def getLangs():
         # Filter out *-norm and crk-no-symbols, these are just intermediate representations.
         LANGS = [
             x
-            for x in LANGS_AVAILABLE
+            for x in langs_available
             if not x.endswith("-ipa")
             and not x.endswith("-equiv")
             and not x.endswith("-no-symbols")
@@ -75,37 +73,60 @@ def getLangs():
         LANGS += ["eng"]
         # Sort LANGS so the -h messages list them alphabetically
         LANGS = sorted(LANGS)
+
+        # Set up LANG_NAMES hash table for studio UI to properly name the dropdown options
+        LANG_NAMES = {lang_code: full_lang_names[lang_code] for lang_code in LANGS}
+
         return LANGS, LANG_NAMES
 
 
-class JoinerCallback:
+# For backwards compatibility, we keep the old names getLangs and getLangsDeferred around.
+# For example, ReadAlongsDesktop
+# (https://github.com/tobyatgithub/ReadalongsDesktop) depended on the old name,
+# and even when it's updated, it'll be helpful to avoid breaking older versions.
+getLangs = get_langs
+getLangsDeferred = get_langs_deferred
+
+
+class JoinerCallbackForClick:
     """Command-line parameter validation for multiple-value options.
 
     The values can be repeated by giving the option multiple times on the
     command line, or by joining them with strings matching joiner_re (colon or
     comma, arbitrarily mixed, by default).
 
-    Matching is case insensitive.
+    Matching is case insensitive iff drop_case is True.
     """
 
-    def __init__(self, valid_values: Iterable, joiner_re=r"[,:]"):
-        self.valid_values = valid_values
+    def __init__(self, valid_values: Iterable, joiner_re=r"[,:]", drop_case=False):
+        """Get a joiner callback.
+
+        Args:
+            valid_values: list of valid values for the multi-value option
+            joiner_re: regex for how to user may join multiple values
+            drop_case: when true, processed results will be converted to lowercase
+        """
+        self.valid_values = valid_values  # ***do not convert this to a list here!***
         self.joiner_re = joiner_re
+        self.drop_case = drop_case
 
     # This signature meets the requirements of click.option's callback parameter:
-    def __call__(self, _ctx, _param, value_groups):
-        # Defer potentially expensive expansion of valid_values until we really need it.
-        self.valid_values, valid_values_iterator = tee(self.valid_values, 2)
-        lc_valid_values = [valid_value.lower() for valid_value in valid_values_iterator]
+    def __call__(self, _ctx=None, _param=None, value_groups=()):
+        # Potentially expensive expansion actually required here, so do it now.
+        self.valid_values = list(self.valid_values)
+        if self.drop_case:
+            self.valid_values = [value.lower() for value in self.valid_values]
         results = [
             value.strip()
             for value_group in value_groups
             for value in re.split(self.joiner_re, value_group)
         ]
+        if self.drop_case:
+            results = [value.lower() for value in results]
         for value in results:
-            if value.lower() not in lc_valid_values:
+            if value not in self.valid_values:
                 raise click.BadParameter(
-                    f"'{value}' is not one of {self.quoted_list(lc_valid_values)}."
+                    f"'{value}' is not one of {self.quoted_list(self.valid_values)}."
                 )
         return results
 
@@ -113,3 +134,20 @@ def __call__(self, _ctx, _param, value_groups):
     def quoted_list(values):
         """Display a list of values quoted, for easy reading in error messages."""
         return ", ".join("'" + v + "'" for v in values)
+
+
+def get_obsolete_callback_for_click(message):
+    """Click callback for telling the user an option is obsolete in a helpful way.
+
+    Args:
+        message (str): message telling the user what the option is replaced by
+    """
+
+    def _callback(_ctx, param, value_groups):
+        if value_groups:
+            joiner = "' / '"
+            raise click.BadParameter(
+                f"The '{joiner.join(param.opts)}' option is obsolete.\n" + message
+            )
+
+    return _callback
diff --git a/readalongs/views.py b/readalongs/views.py
index 3852ac00..9facdef6 100644
--- a/readalongs/views.py
+++ b/readalongs/views.py
@@ -1,30 +1,25 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-#######################################################################
-#
-# views.py
-#
-#   Views for ReadAlong Studio web application
-#   Interactions are described as websocket events and responses
-#   Corresponding JavaScript is found in readalongs/static/js/main.js
-#
-#######################################################################
+"""
+views.py: Views for ReadAlong Studio web application
+
+Interactions are described as websocket events and responses
+Corresponding JavaScript is found in readalongs/static/js/main.js
+"""
 
 import io
 import os
+import re
 from datetime import datetime
 from pathlib import Path
-from subprocess import run
 from tempfile import mkdtemp
 from zipfile import ZipFile
 
 from flask import abort, redirect, render_template, request, send_file, session, url_for
 from flask_socketio import emit
 
+from readalongs.api import align
 from readalongs.app import app, socketio
 from readalongs.log import LOGGER
-from readalongs.util import getLangs
+from readalongs.util import get_langs
 
 ALLOWED_TEXT = ["txt", "xml", "docx"]
 ALLOWED_AUDIO = ["wav", "mp3"]
@@ -44,6 +39,17 @@ def allowed_file(filename: str) -> bool:
     return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
 
 
+def safe_decode(byte_seq: bytes) -> str:
+    """Convert byte_seq to str if it's valid utf8, otherwise return its str rep
+
+    Does not raise any exceptions: non-utf8 inputs will yield escaped specials.
+    """
+    try:
+        return byte_seq.decode()
+    except UnicodeDecodeError:
+        return str(byte_seq)
+
+
 def uploaded_files(dir_path: str) -> dict:
     """Returns all files that have been uploaded
 
@@ -87,7 +93,7 @@ def update_session_config(**kwargs) -> dict:
 
 @app.route("/")
 def home():
-    """ Home View - go to Step 1 which is for uploading files """
+    """Home View - go to Step 1 which is for uploading files"""
     return redirect(url_for("steps", step=1))
 
 
@@ -133,14 +139,20 @@ def remove_file():
     return redirect(url_for("steps", step=1))
 
 
+def option_to_kwargs(option: str) -> str:
+    if option[0:2] == "--":
+        option = option[2:]
+    return option.replace("-", "_")
+
+
 @app.route("/step/<int:step>")
 def steps(step):
-    """ Go through steps """
+    """Go through steps"""
     if step == 1:
         session.clear()
         session["temp_dir"] = mkdtemp()
         temp_dir = session["temp_dir"]
-        langs, lang_names = getLangs()
+        langs, lang_names = get_langs()
         return render_template(
             "upload.html",
             uploaded=uploaded_files(temp_dir),
@@ -150,35 +162,47 @@ def steps(step):
         return render_template("preview.html")
     elif step == 3:
         if "audio" not in session or "text" not in session:
-            log = "Sorry, it looks like something is wrong with your audio or text. Please try again"
+            log = "Sorry, it looks like something is wrong with your audio or text. Please try again."
+            data = {"log": log}
+        elif session["text"].endswith("txt") and not session.get("config", {}).get(
+            "lang"
+        ):
+            log = "Sorry, the language setting is required for plain text files. Please try again."
+            data = {"log": log}
         else:
-            flags = ["--force-overwrite"]
-            for option in ["--closed-captioning", "--save-temps", "--text-grid"]:
-                if session["config"].get(option, False):
-                    flags.append(option)
+            kwargs = dict()
+            kwargs["force_overwrite"] = True
+            kwargs["save_temps"] = session["config"].get("--save-temps", False)
+            kwargs["output_formats"] = []
+            if session["config"].get("--closed-captioning", False):
+                kwargs["output_formats"].append("srt")
+            if session["config"].get("--text-grid", False):
+                kwargs["output_formats"].append("TextGrid")
             if session["text"].endswith("txt"):
-                flags.append("--text-input")
-                flags.append("--language")
-                flags.append(session["config"]["lang"])
+                kwargs["language"] = [session["config"]["lang"]]
+
             timestamp = str(int(datetime.now().timestamp()))
             output_base = "aligned" + timestamp
-            args = (
-                ["readalongs", "align"]
-                + flags
-                + [
-                    session["text"],
-                    session["audio"],
-                    os.path.join(session["temp_dir"], output_base),
-                ]
-            )
-            LOGGER.warning(args)
+
+            kwargs["textfile"] = session["text"]
+            kwargs["audiofile"] = session["audio"]
+            kwargs["output_base"] = os.path.join(session["temp_dir"], output_base)
+            LOGGER.info(kwargs)
+
             _, audio_ext = os.path.splitext(session["audio"])
             data = {"audio_ext": audio_ext, "base": output_base}
+            (status, exception, log_text) = align(**kwargs)
+            status_text = "OK" if status == 0 else "Error"
             if session["config"].get("show-log", False):
-                log = run(args, capture_output=True, check=False)
-                data["log"] = log
+                data["log"] = f"Status: {status_text}"
+                if exception:
+                    data["log"] += f"; Exception: {exception!r}"
+                data["log_lines"] = list(re.split(r"\r?\n", log_text))
             else:
-                run(args, check=False)
+                if status != 0 or exception:
+                    # Always display errors, even when logs are not requested
+                    data["log"] = f"Status: {status_text}; Exception: {exception!r}"
+
             data["audio_path"] = os.path.join(
                 session["temp_dir"], output_base, output_base + audio_ext
             )
@@ -213,7 +237,7 @@ def show_zip(base):
     with ZipFile(data, mode="w") as z:
         for fname in files_to_download:
             path = os.path.join(session["temp_dir"], base, fname)
-            if fname.startswith("aligned"):
+            if fname.startswith("aligned") or fname == "index.html":
                 z.write(path, fname)
     data.seek(0)
 
@@ -231,7 +255,7 @@ def show_zip(base):
 @app.route("/file/<string:fname>", methods=["GET"])
 def return_temp_file(fname):
     fn, _ = os.path.splitext(fname)
-    LOGGER.warning(session["temp_dir"])
+    LOGGER.info(session["temp_dir"])
     path = os.path.join(session["temp_dir"], fn, fname)
     if os.path.exists(path):
         return send_file(path)
diff --git a/readalongs/waveform2svg/audio_util.py b/readalongs/waveform2svg/audio_util.py
index 46a4c24f..878b82bc 100644
--- a/readalongs/waveform2svg/audio_util.py
+++ b/readalongs/waveform2svg/audio_util.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 ###################################################
@@ -29,7 +29,7 @@
 
 
 def smooth(x, window_size=5):
-    """ Smooth the waveform to look... well, smooth """
+    """Smooth the waveform to look... well, smooth"""
     if window_size < 3:
         return x
     s = np.r_[2 * x[0] - x[window_size - 1 :: -1], x, 2 * x[-1] - x[-1:-window_size:-1]]
@@ -39,8 +39,8 @@ def smooth(x, window_size=5):
 
 
 def load_smil(input_path):
-    """ Get the bucketed max and min value from a sequence of WAV files as
-        expressed in a SMIL document """
+    """Get the bucketed max and min value from a sequence of WAV files as
+    expressed in a SMIL document"""
     xml = load_xml(input_path)
     dirname = os.path.dirname(input_path)
     data = None
diff --git a/readalongs/waveform2svg/make_all_svgs.py b/readalongs/waveform2svg/make_all_svgs.py
index cb5bcca1..972fec57 100644
--- a/readalongs/waveform2svg/make_all_svgs.py
+++ b/readalongs/waveform2svg/make_all_svgs.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 ########################################################
@@ -12,7 +12,6 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 
 import argparse
-from io import open
 
 from audio_util import save_txt
 from pitch2svg import make_pitch_svg
diff --git a/readalongs/waveform2svg/pitch2svg.py b/readalongs/waveform2svg/pitch2svg.py
index 1128617c..4880f0db 100644
--- a/readalongs/waveform2svg/pitch2svg.py
+++ b/readalongs/waveform2svg/pitch2svg.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 ###################################################
@@ -17,15 +17,11 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 
 import argparse
-import os
-from io import open
 from math import floor
 
 import chevron
 import librosa
-import numpy as np
 
-from readalongs.log import LOGGER
 from readalongs.waveform2svg.audio_util import (
     SAMPLE_RATE,
     load_wav_or_smil,
diff --git a/readalongs/waveform2svg/units2svg.py b/readalongs/waveform2svg/units2svg.py
index 054da3fd..d5320adf 100644
--- a/readalongs/waveform2svg/units2svg.py
+++ b/readalongs/waveform2svg/units2svg.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 ###################################################
@@ -19,15 +19,12 @@
 import argparse
 import os
 from collections import OrderedDict
-from io import open
 
 import chevron
 import librosa
-import numpy as np
 from audio_util import save_txt
 from lxml import etree
 
-from readalongs.log import LOGGER
 from readalongs.text.util import xpath_default
 
 FMIN = 80
diff --git a/readalongs/waveform2svg/waveform2svg.py b/readalongs/waveform2svg/waveform2svg.py
index cf67cd23..2098b597 100644
--- a/readalongs/waveform2svg/waveform2svg.py
+++ b/readalongs/waveform2svg/waveform2svg.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 ###################################################
@@ -14,14 +14,11 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 
 import argparse
-import os
-from io import open
 from math import ceil, floor
 
 import chevron
 import numpy as np
 
-from readalongs.log import LOGGER
 from readalongs.waveform2svg.audio_util import load_wav_or_smil, save_txt, smooth
 
 SVG_TEMPLATE = """<svg id='wave' preserveAspectRatio='none' viewBox="0 0 {{width}} {{height}}" xmlns="http://www.w3.org/2000/svg" height="{{height}}" width="{{width}}">
diff --git a/readalongs/web_api.py b/readalongs/web_api.py
new file mode 100644
index 00000000..44ac68d2
--- /dev/null
+++ b/readalongs/web_api.py
@@ -0,0 +1,428 @@
+"""
+REST-ish Web API for ReadAlongs Studio text manipulation operations using FastAPI.
+
+See https://readalong-studio.herokuapp.com/api/v1/docs for the documentation.
+
+You can spin up this Web API for development purposes with:
+    cd readalongs/
+    PRODUCTION= uvicorn readalongs.web_api:web_api_app --reload
+- The --reload switch will watch for changes under the directory where it's
+  running and reload the code whenever it changes, so it's best run in readalongs/
+- PRODUCTION= tells uvicorn to run in non-production mode, i.e., in debug mode,
+  and automatically add the header "access-control-allow-origin: *" to each
+  response so you won't get CORS errors using this locally with Studio-Web.
+
+You can also spin up the API server grade (on Linux, not Windows) with gunicorn:
+    gunicorn -w 4 -k uvicorn.workers.UvicornWorker readalongs.web_api:web_api_app
+
+Once spun up, the documentation and API playground will be visible at
+http://localhost:8000/api/v1/docs
+"""
+
+import io
+import os
+import tempfile
+from enum import Enum
+from textwrap import dedent
+from typing import Dict, List, Optional, Union
+
+from fastapi import Body, FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from lxml import etree
+from pydantic import BaseModel, Field
+from starlette.background import BackgroundTask
+
+from readalongs.align import create_tei_from_text, save_label_files, save_subtitles
+from readalongs.log import LOGGER
+from readalongs.text.add_ids_to_xml import add_ids
+from readalongs.text.convert_xml import convert_xml
+from readalongs.text.make_dict import make_dict_object
+from readalongs.text.make_fsg import make_jsgf
+from readalongs.text.make_smil import parse_smil
+from readalongs.text.tokenize_xml import tokenize_xml
+from readalongs.util import get_langs
+
+# Create the app
+web_api_app = FastAPI()
+# Create the v1 version of the API
+v1 = FastAPI()
+# Call get_langs() when the server loads to load the languages into memory
+LANGS = get_langs()
+
+if os.getenv("PRODUCTION", True):
+    origins = [
+        "https://readalong-studio.mothertongues.org",
+    ]  # Allow requests from mt app
+else:
+    origins = ["*"]  # Allow requests from any origin
+web_api_app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["GET", "POST"],
+    allow_headers=["*"],
+)
+
+
+class RequestBase(BaseModel):
+    """Base request for assemble"""
+
+    text_languages: List[str]
+    debug: bool = False
+
+
+class PlainTextRequest(RequestBase):
+    """Request to assemble with input as plain text"""
+
+    text: str
+
+
+class XMLRequest(RequestBase):
+    """Request to assemble with input as XML"""
+
+    xml: str
+
+
+class AssembleResponse(BaseModel):
+    """Response from assemble with the XML prepared and the rest."""
+
+    lexicon: Dict[str, str]  # A dictionary of the form {lang_id: lang_name }
+    jsgf: str  # The JSGF-formatted grammar in plain text
+    text_ids: str  # The text ID input for the decoder in plain text
+    processed_xml: str  # The processed XML is returned as a string
+    input: Optional[Union[XMLRequest, PlainTextRequest]]
+    parsed: Optional[str]
+    tokenized: Optional[str]
+    g2ped: Optional[str]
+
+
+@v1.get("/langs", response_model=Dict[str, str])
+async def langs() -> Dict[str, str]:
+    """Return the list of supported languages and their names as a dict.
+
+    Returns:
+        langs as dict with language codes as keys and the full language name as
+        values, e.g.:
+        `{
+            "alq", "Algonquin",
+            "atj": "Atikamekw",
+            "lc3", "Third Language Name",
+            ...
+        }`
+    """
+
+    return LANGS[1]
+
+
+@v1.post("/assemble", response_model=AssembleResponse)
+async def assemble(
+    request: Union[XMLRequest, PlainTextRequest] = Body(
+        examples={
+            "text": {
+                "summary": "A basic example with plain text input",
+                "value": {
+                    "text": "hej verden",
+                    "text_languages": ["dan", "und"],
+                    "debug": False,
+                },
+            },
+            "xml": {
+                "summary": "A basic example with xml input",
+                "value": {
+                    "xml": "<?xml version='1.0' encoding='utf-8'?><TEI><text><p><s>hej verden</s></p></text></TEI>",
+                    "text_languages": ["dan", "und"],
+                    "debug": False,
+                },
+            },
+        }
+    )
+):
+    """Create an input TEI from the given text (as plain text or XML).
+    Also creates the required grammar, pronunciation dictionary,
+    and text needed by the decoder.
+
+    Encoding: all input and output is in UTF-8.
+
+    Args (as dict items in the request body):
+     - text_languages: the list of languages for g2p processing
+     - debug: set to true for debugging (default: False)
+     - either text or xml:
+        - text: the input text as plain text
+        - xml: the input text as a readalongs-compatible XML structure
+
+    Returns (as dict items in the response body):
+     - lexicon: maps word IDs to their pronunciation
+     - jsgf: grammar for the forced aligner
+     - text_ids: the list of word_ids as a space-separated string
+     - processed_xml: the XML with all the readalongs info in it
+    """
+
+    if isinstance(request, XMLRequest):
+        try:
+            parsed = etree.fromstring(bytes(request.xml, encoding="utf-8"))
+        except etree.XMLSyntaxError as e:
+            raise HTTPException(
+                status_code=422, detail="XML provided is not valid"
+            ) from e
+    elif isinstance(request, PlainTextRequest):
+        parsed = io.StringIO(request.text).readlines()
+        parsed = etree.fromstring(
+            bytes(
+                create_tei_from_text(parsed, text_languages=request.text_languages),
+                encoding="utf-8",
+            )
+        )
+    # tokenize
+    tokenized = tokenize_xml(parsed)
+    # add ids
+    ids_added = add_ids(tokenized)
+    # g2p
+    g2ped, valid = convert_xml(ids_added)
+    if not valid:
+        raise HTTPException(
+            status_code=422,
+            detail="g2p could not be performed, please check your text or your language code",
+        )
+    # create grammar
+    dict_data, jsgf, text_input = create_grammar(g2ped)
+    response = {
+        "lexicon": dict_data,
+        "jsgf": jsgf,
+        "text_ids": text_input,
+        "processed_xml": etree.tostring(g2ped, encoding="utf8").decode(),
+    }
+
+    if request.debug:
+        response["input"] = request.dict()
+        response["parsed"] = etree.tostring(parsed, encoding="utf8")
+        response["tokenized"] = etree.tostring(tokenized, encoding="utf8")
+        response["g2ped"] = etree.tostring(g2ped, encoding="utf8")
+    return response
+
+
+def create_grammar(xml):
+    """Create the grammar and dictionary data from w elements in the given XML"""
+
+    word_elements = xml.xpath("//w")
+    dict_data = make_dict_object(word_elements)
+    fsg_data = make_jsgf(word_elements, filename="test")
+    text_data = " ".join(xml.xpath("//w/@id"))
+    return dict_data, fsg_data, text_data
+
+
+class FormatName(Enum):
+    """The different formats supported to represent readalong alignments"""
+
+    TEXTGRID = "textgrid"  # Praat TextGrid format
+    EAF = "eaf"  # ELAN EAF format
+    SRT = "srt"  # SRT subtitle format
+    VTT = "vtt"  # VTT subtitle format
+
+
+class ConvertRequest(BaseModel):
+    """Convert Request contains the RAS-processed XML and SMIL alignments"""
+
+    audio_duration: float = Field(
+        example=2.01,
+        gt=0.0,
+        title="The duration of the audio used to create the alignment, in seconds.",
+    )
+
+    xml: str = Field(
+        title="The processed_xml returned by /assemble.",
+        example=dedent(
+            """\
+            <?xml version='1.0' encoding='utf-8'?>
+            <TEI>
+                <text xml:lang="dan" fallback-langs="und" id="t0">
+                    <body id="t0b0">
+                        <div type="page" id="t0b0d0">
+                            <p id="t0b0d0p0">
+                                <s id="t0b0d0p0s0"><w id="t0b0d0p0s0w0" ARPABET="HH EH Y">hej</w> <w id="t0b0d0p0s0w1" ARPABET="V Y D EH N">verden</w></s>
+                            </p>
+                        </div>
+                    </body>
+                </text>
+            </TEI>"""
+        ),
+    )
+
+    smil: str = Field(
+        title="The result of aligning xml to the audio with SoundSwallower(.js)",
+        example=dedent(
+            """\
+            <smil xmlns="http://www.w3.org/ns/SMIL" version="3.0">
+                <body>
+                    <par id="par-t0b0d0p0s0w0">
+                        <text src="hej-verden.xml#t0b0d0p0s0w0"/>
+                        <audio src="hej-verden.mp3" clipBegin="0.14" clipEnd="0.78"/>
+                    </par>
+                    <par id="par-t0b0d0p0s0w1">
+                        <text src="hej-verden.xml#t0b0d0p0s0w1"/>
+                        <audio src="hej-verden.mp3" clipBegin="0.78" clipEnd="1.89"/>
+                    </par>
+                </body>
+            </smil>"""
+        ),
+    )
+
+
+class SubtitleTier(Enum):
+    """Which tier of the alignment information is returned"""
+
+    SENTENCE = "sentence"
+    WORD = "word"
+
+
+@v1.post("/convert_alignment/{output_format}")
+async def convert_alignment(  # noqa: C901
+    request: ConvertRequest,
+    output_format: FormatName,
+    tier: Union[SubtitleTier, None] = None,
+) -> FileResponse:
+    """Convert an alignment to a different format.
+
+    Encoding: all input and output is in UTF-8.
+
+    Path Parameter:
+     - output_format: Format to convert to, one of textgrid (Praat TextGrid),
+       eaf (ELAN EAF), srt (SRT subtitles), or vtt (VTT subtitles).
+
+    Query Parameter:
+     - tier: for srt and vtt outputs, whether the subtitles should be at the
+       sentence (this is the default) or word level.
+
+    Args (as dict items in the request body):
+     - audio_duration: duration in seconds of the audio file used to create the alignment
+     - xml: the XML file produced by /assemble
+     - smil: the SMIL file produced by SoundSwallower(.js)
+
+    Formats supported:
+     - TextGrid: Praat TextGrid file format
+     - eaf: ELAN eaf file format
+     - srt: SRT subtitle format (at the sentence or word level, based on tier)
+     - vtt: WebVTT subtitle format (at the sentence or word level, based on tier)
+
+    Data privacy consideration: due to limitations of the libraries used to perform
+    some of these conversions, the output files will be temporarily stored on disk,
+    but they get deleted immediately as this endpoint returns its output or reports
+    any error.
+
+    Returns: a file in the format requested
+    """
+    try:
+        parsed_xml = etree.fromstring(bytes(request.xml, encoding="utf-8"))
+    except etree.XMLSyntaxError as e:
+        raise HTTPException(status_code=422, detail="XML provided is not valid") from e
+
+    try:
+        words = parse_smil(request.smil)
+    except ValueError as e:
+        raise HTTPException(status_code=422, detail="SMIL provided is not valid") from e
+
+    # Data privacy consideration: we have to make sure this temporary directory gets
+    # deleted after the call returns, as we promise in the API documentation.
+    temp_dir_object = tempfile.TemporaryDirectory()
+    temp_dir_name = temp_dir_object.name
+    cleanup = BackgroundTask(temp_dir_object.cleanup)
+    prefix = os.path.join(temp_dir_name, "aligned")
+    LOGGER.info("Temporary directory: %s", temp_dir_name)
+
+    try:
+        if output_format == FormatName.TEXTGRID:
+            try:
+                save_label_files(
+                    words, parsed_xml, request.audio_duration, prefix, "textgrid"
+                )
+            except Exception as e:
+                raise HTTPException(
+                    status_code=422,
+                    detail="XML+SMIL file pair provided cannot be converted",
+                ) from e
+            return FileResponse(
+                prefix + ".TextGrid",
+                background=cleanup,
+                media_type="text/plain",
+                filename="aligned.TextGrid",
+            )
+
+        elif output_format == FormatName.EAF:
+            try:
+                save_label_files(
+                    words, parsed_xml, request.audio_duration, prefix, "eaf"
+                )
+            except Exception as e:
+                raise HTTPException(
+                    status_code=422,
+                    detail="XML+SMIL file pair provided cannot be converted",
+                ) from e
+            return FileResponse(
+                prefix + ".eaf",
+                background=cleanup,
+                media_type="text/xml",
+                filename="aligned.eaf",
+            )
+
+        elif output_format == FormatName.SRT:
+            try:
+                save_subtitles(words, parsed_xml, prefix, "srt")
+            except Exception as e:
+                raise HTTPException(
+                    status_code=422,
+                    detail="XML+SMIL file pair provided cannot be converted",
+                ) from e
+            if tier == SubtitleTier.WORD:
+                return FileResponse(
+                    prefix + "_words.srt",
+                    background=cleanup,
+                    media_type="text/plain",
+                    filename="aligned_words.srt",
+                )
+            else:
+                return FileResponse(
+                    prefix + "_sentences.srt",
+                    background=cleanup,
+                    media_type="text/plain",
+                    filename="aligned_sentences.srt",
+                )
+
+        elif output_format == FormatName.VTT:
+            try:
+                save_subtitles(words, parsed_xml, prefix, "vtt")
+            except Exception as e:
+                raise HTTPException(
+                    status_code=422,
+                    detail="XML+SMIL file pair provided cannot be converted",
+                ) from e
+            if tier == SubtitleTier.WORD:
+                return FileResponse(
+                    prefix + "_words.vtt",
+                    background=cleanup,
+                    media_type="text/plain",
+                    filename="aligned_words.vtt",
+                )
+            else:
+                return FileResponse(
+                    prefix + "_sentences.vtt",
+                    background=cleanup,
+                    media_type="text/plain",
+                    filename="aligned_sentences.vtt",
+                )
+
+        else:
+            raise HTTPException(
+                status_code=500,
+                detail="If this happens, FastAPI Enum validation didn't work so this is a bug!",
+            )
+
+    except Exception:
+        # We don't normally use such a global exception, but in this case we really
+        # need to make sure the temporary directory is cleaned up, so this except
+        # catches any and all problems and wipes the temporary data
+        temp_dir_object.cleanup()
+        raise
+
+
+# Mount the v1 version of the API to the root of the app
+web_api_app.mount("/api/v1", v1)
diff --git a/requirements.api.txt b/requirements.api.txt
new file mode 100644
index 00000000..05f56516
--- /dev/null
+++ b/requirements.api.txt
@@ -0,0 +1,3 @@
+# These are dependencies required by the production Web API
+gunicorn
+uvicorn
diff --git a/requirements.ci.txt b/requirements.ci.txt
new file mode 100644
index 00000000..def6f9e9
--- /dev/null
+++ b/requirements.ci.txt
@@ -0,0 +1,5 @@
+# These are dependencies required by our continuous integration and testing pipelines
+codecov
+coverage
+pip-licenses
+-r requirements.api.txt
diff --git a/requirements.dev.txt b/requirements.dev.txt
index 98ecdc63..668a6356 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -1,4 +1,11 @@
+# This is a set of development dependencies
+black~=22.0
+flake8>=4.0.1
+gitlint-core==0.17.0
+isort>=5.10.1
+mypy>=0.941
 pre-commit>=2.6.0
-black==19.10b0
-flake8>=3.8.3
-isort>=5.4.2
+types-python-slugify>=5.0.3
+types-pyyaml>=6.0.5
+types-requests>=2.27.11
+types-setuptools>=57.4.9
diff --git a/requirements.min.txt b/requirements.min.txt
new file mode 100644
index 00000000..6a20acf0
--- /dev/null
+++ b/requirements.min.txt
@@ -0,0 +1,18 @@
+# This is the minimal set of dependencies required for the readalongs package
+chevron==0.14.0
+click==8.0.4
+coloredlogs==10.0
+fastapi==0.78.0
+Flask>=2.0.0
+Flask-Session==0.3.2
+flask-socketio==4.3.2
+g2p>=0.5.20210825
+lxml==4.9.1
+networkx==2.5
+numpy>=1.16.4
+pydub==0.23.1
+pympi-ling==1.69
+python-slugify==5.0.0
+soundswallower~=0.4.1
+webvtt-py==0.4.2
+werkzeug==2.0.3
diff --git a/requirements.txt b/requirements.txt
index aa4d610e..89c49607 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,17 +1,4 @@
-chevron==0.14.0
-coloredlogs==10.0
-Flask>=2.0.0
-Flask-Session==0.3.2
-flask-socketio==4.3.2
-g2p>=0.5.20210825
-lxml==4.6.5
-networkx==2.5
-numpy>=1.16.4
-panphon>=0.14
-soundswallower==0.1.1
-pydub==0.23.1
-pympi-ling==1.69
-python-slugify==5.0.0
-six==1.12.0
-tqdm==4.31.1
-webvtt-py==0.4.2
+# Heroku can only read requirements from "requirements.txt",
+# so we separate the minimal library from the rest of the dependencies.
+-r requirements.min.txt
+-r requirements.api.txt
diff --git a/run.py b/run.py
index 8cdf4944..a0f8be30 100644
--- a/run.py
+++ b/run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """ Run ReadAlong Studio as web application
 
diff --git a/setup.cfg b/setup.cfg
index 5f571805..8d4c021f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -9,3 +9,9 @@ ensure_newline_before_comments=True
 
 [mypy]
 ignore_missing_imports = True
+
+[flake8]
+ignore = E203, E266, E501, W503
+max-line-length = 88
+max-complexity = 18
+select = B,C,E,F,W,T4,B9
diff --git a/setup.py b/setup.py
index d5e7d174..4ae97624 100644
--- a/setup.py
+++ b/setup.py
@@ -9,20 +9,28 @@
 version_path = os.path.join(os.path.dirname(readalongs.__file__), "_version.py")
 VERSION = readalongs.VERSION + "." + build_no
 
-with open(version_path, "w") as f:
+with open(version_path, "w", newline="\n", encoding="utf-8") as f:
     print(f'__version__ = "{VERSION}"', file=f)
 
-with open("requirements.txt") as f:
+with open("requirements.min.txt") as f:
     required = f.read().splitlines()
 
 setup(
     name="readalongs",
+    license="MIT",
     python_requires=">=3.7",
     version=VERSION,
-    long_description="ReadAlong Studio",
+    description="ReadAlong Studio",
+    long_description="ReadAlong Studio, Audiobook alignment for Indigenous languages",
+    platform=["any"],
     packages=find_packages(exclude=["test"]),
     include_package_data=True,
     zip_safe=False,
     install_requires=required,
     entry_points={"console_scripts": ["readalongs = readalongs.cli:cli"]},
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
 )
diff --git a/test/basic_test_case.py b/test/basic_test_case.py
index 3542a59b..8889c30d 100644
--- a/test/basic_test_case.py
+++ b/test/basic_test_case.py
@@ -1,7 +1,7 @@
 """Common base class for the ReadAlongs test suites"""
 
-import os
 import tempfile
+from pathlib import Path
 from unittest import TestCase
 
 from readalongs.app import app
@@ -10,19 +10,33 @@
 
 class BasicTestCase(TestCase):
     """A Basic Unittest build block class that comes bundled with
-    a temporary directory (tempdir), and access to an app runner
-    (self.runner)
+    a temporary directory (self.tempdir), the path to the test data (self.data_dir),
+    and access to an app runner (self.runner)
+
+    For convenience, self.tempdir and self.data_dir are pathlib.Path objects
+    that can be used either with os.path functions or the shorter Path operators.
+    E.g., these two lines are equivalent:
+        text_file = os.path.join(self.data_dir, "ej-fra.txt")
+        text_file = self.data_dir / "ej-fra.txt"
     """
 
     LOGGER.setLevel("DEBUG")
-    data_dir = os.path.join(os.path.dirname(__file__), "data")
+    data_dir = Path(__file__).parent / "data"
 
     # Set this to True to keep the temp dirs after running, for manual inspection
     # but please don't push a commit setting this to True!
+    # To keep temp dirs for just one subclass, add this line to its setUp() function:
+    # function before the call to super().setUp():
+    #     self.keep_temp_dir_after_running = True
     keep_temp_dir_after_running = False
 
     def setUp(self):
-        """Create a temporary directory, self.tempdir, and a test runner, self.runner"""
+        """Create a temporary directory, self.tempdir, and a test runner, self.runner
+
+        If a subclass needs its own setUp() function, make sure to call
+            super().setUp()
+        at the beginning of it.
+        """
         app.logger.setLevel("DEBUG")
         self.runner = app.test_cli_runner()
         tempdir_prefix = f"tmpdir_{type(self).__name__}_"
@@ -35,8 +49,14 @@ def setUp(self):
             # Alternative tempdir code keeps it after running, for manual inspection:
             self.tempdir = tempfile.mkdtemp(prefix=tempdir_prefix, dir=".")
             print("tmpdir={}".format(self.tempdir))
+        self.tempdir = Path(self.tempdir)
 
     def tearDown(self):
-        """Clean up the temporary directory"""
+        """Clean up the temporary directory
+
+        If a subclass needs its own tearDown() function, make sure to call
+            super().tearDown()
+        at the end of it.
+        """
         if not self.keep_temp_dir_after_running:
             self.tempdirobj.cleanup()
diff --git a/test/data/ej-fra-anchors.xml b/test/data/ej-fra-anchors.xml
index 5ac95e85..8200e723 100644
--- a/test/data/ej-fra-anchors.xml
+++ b/test/data/ej-fra-anchors.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra-anchors2.xml b/test/data/ej-fra-anchors2.xml
index ce7af6c2..32573ff9 100644
--- a/test/data/ej-fra-anchors2.xml
+++ b/test/data/ej-fra-anchors2.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="fra">
         <anchor time=".5s"/>
         <body>
diff --git a/test/data/ej-fra-converted.xhtml b/test/data/ej-fra-converted.xhtml
index 0eaf0592..c4c746a4 100644
--- a/test/data/ej-fra-converted.xhtml
+++ b/test/data/ej-fra-converted.xhtml
@@ -1,9 +1,6 @@
 <?xml version='1.0' encoding='utf-8'?>
 <html xmlns="http://www.w3.org/1999/xhtml">
-    <head><head>Book</head><link rel="stylesheet" href="stylesheet.css" type="text/css"/></head><body><!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
-    <text xml:lang="fra" id="t0">
+    <head><head>Book</head><link rel="stylesheet" href="stylesheet.css" type="text/css"/></head><body><text xml:lang="fra" id="t0">
         <body id="t0b0">
             <div type="page" id="t0b0d0">
                 <p id="t0b0d0p0">
diff --git a/test/data/ej-fra-converted.xml b/test/data/ej-fra-converted.xml
index 0988f873..616210c1 100644
--- a/test/data/ej-fra-converted.xml
+++ b/test/data/ej-fra-converted.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="fra" id="t0">
         <body id="t0b0">
             <div type="page" id="t0b0d0">
diff --git a/test/data/ej-fra-dna.xml b/test/data/ej-fra-dna.xml
index b581c7da..a1fe1e53 100644
--- a/test/data/ej-fra-dna.xml
+++ b/test/data/ej-fra-dna.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra-package.xml b/test/data/ej-fra-package.xml
index 1adc6910..188a478f 100644
--- a/test/data/ej-fra-package.xml
+++ b/test/data/ej-fra-package.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra-silence-bad.xml b/test/data/ej-fra-silence-bad.xml
index b069aa96..e34159ef 100644
--- a/test/data/ej-fra-silence-bad.xml
+++ b/test/data/ej-fra-silence-bad.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra-silence.xml b/test/data/ej-fra-silence.xml
index 416e99a4..dac1e681 100644
--- a/test/data/ej-fra-silence.xml
+++ b/test/data/ej-fra-silence.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/ej-fra.xml b/test/data/ej-fra.xml
index 76ee1230..94bfe357 100644
--- a/test/data/ej-fra.xml
+++ b/test/data/ej-fra.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/fra-prepared.xml b/test/data/fra-prepared.xml
index 8eaf24a6..12a19f30 100644
--- a/test/data/fra-prepared.xml
+++ b/test/data/fra-prepared.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="fra" fallback-langs="und">
         <body>
             <div type="page">
diff --git a/test/data/fra-tokenized.xml b/test/data/fra-tokenized.xml
index 894347ce..7b1eee82 100644
--- a/test/data/fra-tokenized.xml
+++ b/test/data/fra-tokenized.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text xml:lang="fra">
         <body>
             <div type="page">
diff --git a/test/data/mixed-langs.g2p.xml b/test/data/mixed-langs.g2p.xml
index 215e4859..6463c660 100644
--- a/test/data/mixed-langs.g2p.xml
+++ b/test/data/mixed-langs.g2p.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text id="t0">
         <body id="t0b0">
             <div type="page" id="t0b0d0">
diff --git a/test/data/mixed-langs.tokenized.xml b/test/data/mixed-langs.tokenized.xml
index 992c133f..74c1d8da 100644
--- a/test/data/mixed-langs.tokenized.xml
+++ b/test/data/mixed-langs.tokenized.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text>
         <body>
             <div type="page">
diff --git a/test/data/mixed-langs.xml b/test/data/mixed-langs.xml
index 09606424..083c1b22 100644
--- a/test/data/mixed-langs.xml
+++ b/test/data/mixed-langs.xml
@@ -1,8 +1,5 @@
 <?xml version='1.0' encoding='utf-8'?>
 <TEI>
-    <!-- To exclude any element from alignment, add the do-not-align="true" attribute to
-         it, e.g., <p do-not-align="true">...</p>, or
-         <s>Some text <foo do-not-align="true">do not align this</foo> more text</s> -->
     <text>
         <body>
             <div type="page">
diff --git a/test/data/patrickxtlan.xml b/test/data/patrickxtlan.xml
index 3ae19c77..9456a6e6 100644
--- a/test/data/patrickxtlan.xml
+++ b/test/data/patrickxtlan.xml
@@ -3,5 +3,6 @@
     <p>
         <s><w xml:lang="eng">Patrick</w><w xml:lang="kwk-umista">xtła̱n</w></s>
         <s><w xml:lang="und">Patrickxtła̱n</w></s>
+	<s><w>foo<syl xml:lang="eng">Patrick</syl>bar<syl xml:lang="kwk-umista">xtła̱n</syl>baz</w></s>
     </p>
 </TEI>
diff --git a/test/data/sample-config.json b/test/data/sample-config.json
index 93b1e7e6..df3537c6 100644
--- a/test/data/sample-config.json
+++ b/test/data/sample-config.json
@@ -1,4 +1,9 @@
 {
+    "title": "My awesome read-along",
+    "header": "A story in my language",
+    "subheader": "Read by me",
+    "theme": "light",
+
     "images":
         {
             "0": "image-for-page1.jpg",
diff --git a/test/run.py b/test/run.py
index 9869572e..6efb1d0d 100755
--- a/test/run.py
+++ b/test/run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """
 Top-level runner for out test suites
@@ -14,38 +14,43 @@
 """
 
 import os
+import re
 import sys
 from unittest import TestLoader, TestSuite, TextTestRunner
 
 from test_align_cli import TestAlignCli
 from test_anchors import TestAnchors
+from test_api import TestAlignApi
 from test_audio import TestAudio
 from test_config import TestConfig
 from test_dna_text import TestDNAText
 from test_dna_utils import TestDNAUtils
 from test_force_align import TestForceAlignment, TestXHTML
 from test_g2p_cli import TestG2pCli
-from test_indices import TestIndices
+from test_make_xml_cli import TestMakeXMLCli
 from test_misc import TestMisc
 from test_package_urls import TestPackageURLs
-from test_prepare_cli import TestPrepareCli
 from test_silence import TestSilence
 from test_temp_file import TestTempFile
 from test_tokenize_cli import TestTokenizeCli
 from test_tokenize_xml import TestTokenizer
+from test_web_api import TestWebApi
+from test_smil import TestSmilUtilities
 
 from readalongs.log import LOGGER
 
-loader = TestLoader()
+LOADER = TestLoader()
 
 e2e_tests = [
-    loader.loadTestsFromTestCase(test) for test in (TestForceAlignment, TestXHTML)
+    LOADER.loadTestsFromTestCase(test) for test in (TestForceAlignment, TestXHTML)
 ]
 
-indices_tests = [loader.loadTestsFromTestCase(test) for test in [TestIndices]]
+api_tests = [
+    LOADER.loadTestsFromTestCase(test) for test in [TestWebApi]
+]  # TODO: add some load testing with https://locust.io/
 
 other_tests = [
-    loader.loadTestsFromTestCase(test)
+    LOADER.loadTestsFromTestCase(test)
     for test in [
         TestAnchors,
         TestConfig,
@@ -54,43 +59,84 @@
         TestTokenizer,
         TestTokenizeCli,
         TestTempFile,
-        TestPrepareCli,
+        TestMakeXMLCli,
         TestAudio,
         TestAlignCli,
+        TestAlignApi,
         TestG2pCli,
         TestMisc,
         TestSilence,
+        TestSmilUtilities,
         TestPackageURLs,
+        TestWebApi,
     ]
 ]
 
 
-def run_tests(suite):
-    """Run the specified test suite"""
+def list_tests(suite: TestSuite):
+    for subsuite in suite:
+        for match in re.finditer(r"tests=\[([^][]+)\]>", str(subsuite)):
+            yield from match[1].split(", ")
+
+
+def describe_suite(suite: TestSuite):
+    full_suite = LOADER.discover(os.path.dirname(__file__))
+    full_list = list(list_tests(full_suite))
+    requested_list = list(list_tests(suite))
+    requested_set = set(requested_list)
+    print("Test suite includes:", *sorted(requested_list), sep="\n"),
+    print(
+        "\nTest suite excludes:",
+        *sorted(test for test in full_list if test not in requested_set),
+        sep="\n"
+    )
+
+
+def run_tests(suite: str, describe: bool = False) -> bool:
+    """Run the specified test suite.
+
+    Args:
+        suite: one of "all", "dev", etc specifying which suite to run
+        describe: if True, list all the test cases instead of running them.
+
+    Returns: True iff success
+    """
 
     if suite == "e2e":
-        suite = TestSuite(e2e_tests)
+        test_suite = TestSuite(e2e_tests)
+    elif suite == "api":
+        test_suite = TestSuite(api_tests)
     elif suite == "dev":
-        suite = TestSuite(indices_tests + other_tests + e2e_tests)
+        test_suite = TestSuite(other_tests + e2e_tests)
     elif suite in ("prod", "all"):
-        suite = loader.discover(os.path.dirname(__file__))
+        test_suite = LOADER.discover(os.path.dirname(__file__))
     elif suite == "other":
-        suite = TestSuite(other_tests)
+        test_suite = TestSuite(other_tests)
     else:
         LOGGER.error(
             "Sorry, you need to select a Test Suite to run, one of: "
             "dev, all (or prod), e2e, other"
         )
-        sys.exit(1)
+        return False
 
-    runner = TextTestRunner(verbosity=3)
-    return runner.run(suite)
+    if describe:
+        describe_suite(test_suite)
+        return True
+    else:
+        runner = TextTestRunner(verbosity=3)
+        return runner.run(test_suite).wasSuccessful()
 
 
 if __name__ == "__main__":
+    describe = "--describe" in sys.argv
+    if describe:
+        sys.argv.remove("--describe")
+
     try:
-        result = run_tests(sys.argv[1])
-        if not result.wasSuccessful():
-            raise Exception("Some tests failed. Please see log above.")
+        result = run_tests(sys.argv[1], describe)
+        if not result:
+            LOGGER.error("Some tests failed. Please see log above.")
+            sys.exit(1)
     except IndexError:
-        print("Please specify a test suite to run: i.e. 'dev' or 'all'")
+        LOGGER.error("Please specify a test suite to run: i.e. 'dev' or 'all'")
+        sys.exit(1)
diff --git a/test/sound_swallower_stub.py b/test/sound_swallower_stub.py
index 807a83eb..96c12b03 100644
--- a/test/sound_swallower_stub.py
+++ b/test/sound_swallower_stub.py
@@ -44,46 +44,31 @@ def SoundSwallowerStub(*segments):
 class SoundSwallowerDecoderStub:
     """Stub class so we don't really call the SoundSwallower decoder"""
 
-    class Segment:
+    class Seg:
         def __init__(self, segment_desc):
             """Init self from "word_id:start:end" description, e.g. "p0s0w0:0:1"."""
-            self.word, s, e = segment_desc.split(":")
-            self.start_frame = int(s)
-            self.end_frame = int(e)
+            self.text, s, e = segment_desc.split(":")
+            self.start = float(s) / 100
+            self.duration = (float(e) - float(s)) / 100
 
         def __repr__(self):
-            return f'Segment(word="{self.word}", start_frame={self.start_frame}, end_frame={self.end_frame})'
+            return (
+                f'Seg(text="{self.text}", start={self.start}, duration={self.duration})'
+            )
 
     class Config:
         def __init__(self, *args):
             pass
 
-        def set_boolean(self, *args):
-            pass
-
-        def set_string(self, *args):
-            pass
-
-        def set_float(self, *args):
-            pass
-
-        def set_int(self, *args):
-            pass
-
-        def get_float(self, *args):
-            return 1.0
-
-        def get_int(self, name):
-            if name == "-frate":
+        def __getitem__(self, key):
+            if key == "frate":
                 # Pretend the framerate is always 1000, so the stub times are all in ms
                 return 1000
             else:
-                return 1
+                return "SPAM"
 
     def __init__(self, *outputs):
-        self._segments = [
-            SoundSwallowerDecoderStub.Segment(segment) for segment in outputs
-        ]
+        self._segments = [SoundSwallowerDecoderStub.Seg(segment) for segment in outputs]
 
     def __call__(self, *args):
         return self
@@ -97,6 +82,7 @@ def process_raw(self, *args, **kwargs):
     def end_utt(self):
         pass
 
+    @property
     def seg(self):
         return self._segments
 
diff --git a/test/test_align_cli.py b/test/test_align_cli.py
index 3f19e34a..c03856d6 100755
--- a/test/test_align_cli.py
+++ b/test/test_align_cli.py
@@ -1,10 +1,12 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """
 Unit test suite for the readalongs align CLI command
 """
 
 import os
+import pathlib
+import tempfile
 from os.path import exists, join
 from unittest import main
 
@@ -41,9 +43,11 @@ def test_invoke_align(self):
                 "srt:TextGrid,eaf",  # tests that we can give -o multiple values, separated by : or ,
                 "-l",
                 "fra",
+                "--align-mode",
+                "auto",
                 "--config",
                 join(self.data_dir, "sample-config.json"),
-                join(self.data_dir, "ej-fra.txt"),
+                self.add_bom(join(self.data_dir, "ej-fra.txt")),
                 join(self.data_dir, "ej-fra.m4a"),
                 output,
             ],
@@ -75,6 +79,10 @@ def test_invoke_align(self):
             exists(join(output, "tempfiles", "output.tokenized.xml")),
             "alignment with -s should have created tempfiles/output.tokenized.xml",
         )
+        with open(
+            join(output, "tempfiles", "output.tokenized.xml"), "r", encoding="utf-8"
+        ) as f:
+            self.assertNotIn("\ufeff", f.read())
         self.assertTrue(
             exists(join(output, "assets", "image-for-page1.jpg")),
             "alignment with image files should have copied image-for-page1.jpg to assets",
@@ -82,6 +90,7 @@ def test_invoke_align(self):
         self.assertIn("image-for-page2.jpg is accessible ", results.stdout)
         os.unlink("image-for-page1.jpg")
         self.assertFalse(exists("image-for-page1.jpg"))
+        self.assertIn("Align mode strict succeeded for sequence 0.", results.stdout)
         # print(results.stdout)
 
         # Move the alignment output to compare with further down
@@ -97,10 +106,12 @@ def test_invoke_align(self):
             [
                 "-o",
                 "xhtml",
+                "--align-mode",
+                "moderate",
                 "-s",
                 "--config",
                 join(self.data_dir, "sample-config.json"),
-                join(self.data_dir, "ej-fra-dna.xml"),
+                self.add_bom(join(self.data_dir, "ej-fra-dna.xml")),
                 join(self.data_dir, "ej-fra.m4a"),
                 output,
             ],
@@ -120,6 +131,9 @@ def test_invoke_align(self):
             exists(join(output, "assets", "image-for-page1.jpg")),
             "image-for-page1.jpg was not on disk, cannot have been copied",
         )
+        self.assertIn(
+            "Align mode moderate succeeded for sequence 0.", results_dna.stdout
+        )
 
         # Functionally the same as self.assertTrue(filecmp.cmp(f1, f2)), but show where
         # the differences are if the files are not identical
@@ -174,6 +188,8 @@ def test_align_with_package(self):
                     output,
                     "-o",
                     "html",
+                    "--config",
+                    self.add_bom(self.data_dir / "sample-config.json"),
                 ],
             )
         # print(results_html.output)
@@ -401,9 +417,11 @@ def test_infer_plain_text_or_xml(self):
         self.assertIn("No input language specified for plain text", results.output)
 
         # XML with guess by contents
-        infile3 = write_file(
-            join(self.tempdir, "infile3"),
-            "<?xml version='1.0' encoding='utf-8'?><text>blah blah</text>",
+        infile3 = self.add_bom(
+            write_file(
+                join(self.tempdir, "infile3"),
+                "<?xml version='1.0' encoding='utf-8'?><text>blah blah</text>",
+            )
         )
         with SoundSwallowerStub("word:0:1"):
             results = self.runner.invoke(
@@ -447,19 +465,155 @@ def test_infer_plain_text_or_xml(self):
         self.assertNotEqual(results.exit_code, 0)
         self.assertIn("Error parsing XML", results.output)
 
+    def test_obsolete_switches(self):
         # Giving -i switch generates an obsolete-switch error message
         with SoundSwallowerStub("word:0:1"):
             results = self.runner.invoke(
                 align,
                 [
                     "-i",
-                    infile5,
+                    join(self.data_dir, "fra.txt"),
                     join(self.data_dir, "noise.mp3"),
                     join(self.tempdir, "outdir6"),
                 ],
             )
         self.assertNotEqual(results.exit_code, 0)
-        self.assertIn("The -i option is obsolete.", results.output)
+        self.assertIn("is obsolete.", results.output)
+
+        # Giving --g2p-verbose switch generates an obsolete-switch error message
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "--g2p-verbose",
+                    join(self.data_dir, "fra.txt"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir7"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("is obsolete.", results.output)
+
+        # Giving --g2p-fallback switch generates an obsolete-switch error message
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "--g2p-fallback",
+                    "fra:end:und",
+                    join(self.data_dir, "fra.txt"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir8"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("is obsolete.", results.output)
+
+    def test_oo_option(self):
+        """Exercise the hidden -oo / --output-orth option"""
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "-oo",
+                    "eng-arpabet",
+                    join(self.data_dir, "ej-fra.xml"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir9"),
+                ],
+            )
+        self.assertEqual(results.exit_code, 0)
+
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "-oo",
+                    "not-an-alphabet",
+                    join(self.data_dir, "ej-fra.xml"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir10"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("Could not g2p", results.output)
+        self.assertIn("not-an-alphabet", results.output)
+
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "-oo",
+                    "dan-ipa",
+                    join(self.data_dir, "ej-fra.xml"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir11"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("Could not g2p", results.output)
+        self.assertIn("no path", results.output)
+
+        with SoundSwallowerStub("word:0:1"):
+            results = self.runner.invoke(
+                align,
+                [
+                    "-oo",
+                    "dan-ipa",
+                    "-l",
+                    "eng",
+                    join(self.data_dir, "fra.txt"),
+                    join(self.data_dir, "noise.mp3"),
+                    join(self.tempdir, "outdir12"),
+                ],
+            )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("Could not g2p", results.output)
+        self.assertIn('Cannot g2p "eng" to output orthography', results.output)
+
+    def add_bom(self, filename):
+        """Create a temporary copy of filename with the a BOM in it, in self.tempdir"""
+        # We pepper calls to add_bom() around the test suite, to make sure all
+        # different kinds of input files are accepted with and without a BOM
+        output_file = tempfile.NamedTemporaryFile(
+            mode="wb",
+            dir=self.tempdir,
+            delete=False,
+            prefix="bom_",
+            suffix=os.path.basename(filename),
+        )
+        output_file.write(b"\xef\xbb\xbf")
+        with open(filename, "rb") as file_binary:
+            output_file.write(file_binary.read())
+        output_file.close()
+        return output_file.name
+
+    def test_add_bom(self):
+        """Make sure add_bom does what we mean it to, i.e., test the test harness."""
+
+        def slurp_bin(filename):
+            with open(filename, "rb") as f:
+                return f.read()
+
+        def slurp_text(filename, encoding):
+            with open(filename, "r", encoding=encoding) as f:
+                return f.read()
+
+        base_file = write_file(self.tempdir / "add-bom-input.txt", "Random Text été")
+        bom_file = self.add_bom(base_file)
+        self.assertEqual(
+            slurp_text(base_file, "utf-8"), slurp_text(bom_file, "utf-8-sig")
+        )
+        self.assertEqual(
+            slurp_text(bom_file, "utf-8"), "\ufeff" + slurp_text(base_file, "utf-8")
+        )
+        self.assertNotEqual(slurp_bin(base_file), slurp_bin(bom_file))
+        self.assertEqual(b"\xef\xbb\xbf" + slurp_bin(base_file), slurp_bin(bom_file))
+
+        bom_file_pathlib = self.add_bom(pathlib.Path(base_file))
+        self.assertEqual(
+            slurp_text(base_file, "utf-8"), slurp_text(bom_file_pathlib, "utf-8-sig")
+        )
 
 
 if __name__ == "__main__":
diff --git a/test/test_anchors.py b/test/test_anchors.py
index c54e0fd0..f8c6860f 100755
--- a/test/test_anchors.py
+++ b/test/test_anchors.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Unit testing for the anchors functionality in readalongs align"""
 
@@ -8,6 +8,7 @@
 from basic_test_case import BasicTestCase
 
 from readalongs.align import align_audio
+from readalongs.log import LOGGER
 
 
 class TestAnchors(BasicTestCase):
@@ -70,6 +71,30 @@ def test_anchors_outer_too(self):
                 f"{partial_wav_file} should not be empty",
             )
 
+    def test_anchors_align_modes(self):
+        xml_with_anchors = """<doc xml:lang="fra"><body>
+            <s>Bonjour.</s>
+            <anchor time="1.62s"/>
+            <s>Ceci ne peut pas être aligné avec du bruit.</s>
+            <anchor time="5.62s"/>
+            </body></doc>
+        """
+        xml_file = os.path.join(self.tempdir, "text-with-anchors.xml")
+        with open(xml_file, "wt", encoding="utf8") as f:
+            print(xml_with_anchors, file=f)
+        with self.assertLogs(LOGGER, level="INFO") as cm:
+            results = align_audio(
+                xml_file,
+                os.path.join(self.data_dir, "noise.mp3"),
+            )
+        words = results["words"]
+        self.assertEqual(len(words), 10)
+        logger_output = "\n".join(cm.output)
+        self.assertIn("Align mode strict succeeded for sequence 0.", logger_output)
+        self.assertIn("Align mode strict failed for sequence 1.", logger_output)
+        self.assertIn("Align mode moderate failed for sequence 1.", logger_output)
+        self.assertIn("Align mode loose succeeded for sequence 1.", logger_output)
+
 
 if __name__ == "__main__":
     main()
diff --git a/test/test_api.py b/test/test_api.py
new file mode 100755
index 00000000..4d08c6a7
--- /dev/null
+++ b/test/test_api.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+
+"""
+Test suite for the API way to call align
+"""
+
+import os
+from unittest import main
+
+import click
+from basic_test_case import BasicTestCase
+from sound_swallower_stub import SoundSwallowerStub
+
+import readalongs.api as api
+from readalongs.log import LOGGER
+
+
+class TestAlignApi(BasicTestCase):
+    """Test suite for the API way to call align()"""
+
+    def test_call_align(self):
+        # We deliberately pass pathlib.Path objects as input, to make sure the
+        # API accepts them too.
+        langs = ("fra",)  # make sure language can be an iterable, not just a list.
+        with SoundSwallowerStub("t0b0d0p0s0w0:920:1520", "t0b0d0p0s1w0:1620:1690"):
+            (status, exception, log) = api.align(
+                self.data_dir / "ej-fra.txt",
+                self.data_dir / "ej-fra.m4a",
+                self.tempdir / "output",
+                langs,
+                output_formats=["html", "TextGrid", "srt"],
+            )
+        self.assertEqual(status, 0)
+        self.assertTrue(exception is None)
+        self.assertIn("Words (<w>) not present; tokenizing", log)
+        expected_output_files = (
+            "output.smil",
+            "output.xml",
+            "output.m4a",
+            "output.TextGrid",
+            "output_sentences.srt",
+            "output_words.srt",
+            "index.html",
+            "output.html",
+        )
+        for f in expected_output_files:
+            self.assertTrue(
+                (self.tempdir / "output" / f).exists(),
+                f"successful alignment should have created {f}",
+            )
+        self.assertEqual(
+            list(langs),
+            ["fra"],
+            "Make sure the API call doesn't not modify my variables",
+        )
+
+        (status, exception, log) = api.align("", "", self.tempdir / "errors")
+        self.assertNotEqual(status, 0)
+        self.assertFalse(exception is None)
+
+    def test_call_make_xml(self):
+        (status, exception, log) = api.make_xml(
+            self.data_dir / "ej-fra.txt", self.tempdir / "prepared.xml", ("fra", "eng")
+        )
+        self.assertEqual(status, 0)
+        self.assertTrue(exception is None)
+        self.assertIn("Wrote ", log)
+        with open(self.tempdir / "prepared.xml") as f:
+            xml_text = f.read()
+            self.assertIn('xml:lang="fra" fallback-langs="eng,und"', xml_text)
+
+        (status, exception, log) = api.make_xml(
+            self.data_dir / "ej-fra.txt",
+            self.tempdir / "bad.xml",
+            ("fra", "not-a-lang"),
+        )
+        self.assertNotEqual(status, 0)
+        self.assertTrue(isinstance(exception, click.BadParameter))
+
+        (status, exception, log) = api.make_xml(
+            self.data_dir / "file-not-found.txt", self.tempdir / "none.xml", ("fra",)
+        )
+        self.assertNotEqual(status, 0)
+        self.assertTrue(isinstance(exception, click.UsageError))
+
+    def test_deprecated_prepare(self):
+        with self.assertLogs(LOGGER, level="WARNING") as cm:
+            api.prepare(self.data_dir / "ej-fra.txt", os.devnull, ("fra",))
+        self.assertIn("deprecated", "\n".join(cm.output))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/test_audio.py b/test/test_audio.py
index 8e27e1ff..6c8b1001 100755
--- a/test/test_audio.py
+++ b/test/test_audio.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Test suite for various audio contents handling methods"""
 
@@ -48,9 +48,14 @@ def align(self, input_text_path, input_audio_path, output_path, flags):
 
     def test_mute_section(self):
         """Should mute section of audio"""
+        max_before = self.audio_segment[1000:2000].max
         muted_segment = mute_section(self.audio_segment, 1000, 2000)
         muted_section = muted_segment[1000:2000]
-        self.assertLessEqual(muted_section.max, 1)
+        # This worked with pydub 0.23.1, but it does not work with 0.25.1
+        # self.assertLessEqual(muted_section.max, 1)
+        # Muting applies a gain of -120, so the results is not necessarily 0,
+        # it's just much smaller.
+        self.assertLessEqual(muted_section.max, max_before / 1000)
 
     def test_remove_section(self):
         """Should remove section of audio"""
@@ -84,6 +89,8 @@ def test_align_sample(self):
             "pip install --force-reinstall --upgrade might be required "
             "if dependencies changed.",
         )
+        # Make sure ss logs are disabled
+        self.assertNotIn("Current configuration", process.stderr)
 
     def test_align_removed(self):
         """Try aligning section with removed audio"""
@@ -95,7 +102,7 @@ def test_align_removed(self):
         # Align
         input_text_path = os.path.join(self.data_dir, "audio_sample.txt")
         input_audio_path = audio_output_path
-        flags = ["-l", "eng"]
+        flags = ["-l", "eng", "--debug-aligner"]
         output_path = os.path.join(self.tempdir, "output_removed")
         process = self.align(input_text_path, input_audio_path, output_path, flags)
         if process.returncode != 0:
@@ -109,6 +116,8 @@ def test_align_removed(self):
             "pip install --force-reinstall --upgrade might be required "
             "if dependencies changed.",
         )
+        # Make sure ss logs are enabled
+        self.assertIn("Current configuration", process.stderr)
 
     def test_align_muted(self):
         """Try aligning section with muted audio"""
diff --git a/test/test_config.py b/test/test_config.py
index a724f773..9ef01859 100755
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Test suite for loading the config.json configuration file for readalongs align"""
 
@@ -55,7 +55,7 @@ def test_arbitrary_xml(self):
         # bad xml raises lxml.etree.XMLSyntaxError
         with self.assertRaises(etree.XMLSyntaxError):
             new_xml = add_supplementary_xml(
-                self.xml, {"xml": [{"xpath": "//div[1]", "value": "bloop"}]},
+                self.xml, {"xml": [{"xpath": "//div[1]", "value": "bloop"}]}
             )
 
         # if xpath isn't valid, log warning
diff --git a/test/test_dna_text.py b/test/test_dna_text.py
index 25127398..a7426ec0 100755
--- a/test/test_dna_text.py
+++ b/test/test_dna_text.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Test handling of DNA text in tokenization"""
 
diff --git a/test/test_dna_utils.py b/test/test_dna_utils.py
index 7218a1dd..4022a6fb 100755
--- a/test/test_dna_utils.py
+++ b/test/test_dna_utils.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Test suite for DNA segment manupulation methods"""
 
@@ -129,7 +129,7 @@ def test_segment_intersection(self):
         )
         self.assertEqual(
             segment_intersection(
-                segments_from_pairs((10, 30)), segments_from_pairs((19, 19)),
+                segments_from_pairs((10, 30)), segments_from_pairs((19, 19))
             ),
             segments_from_pairs((19, 19)),
         )
diff --git a/test/test_force_align.py b/test/test_force_align.py
index 4793fe8b..15480b66 100755
--- a/test/test_force_align.py
+++ b/test/test_force_align.py
@@ -1,16 +1,25 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """
 Test force-alignment with SoundsSwallower FSG search from Python API
 """
 
 import os
+import shutil
 import unittest
+import wave
+from tempfile import TemporaryDirectory
 
 from basic_test_case import BasicTestCase
 from lxml import etree
+from soundswallower import get_model_path
 
-from readalongs.align import align_audio, convert_to_xhtml, create_input_tei
+from readalongs.align import (
+    align_audio,
+    convert_to_xhtml,
+    create_input_tei,
+    get_word_texts_and_sentences,
+)
 from readalongs.log import LOGGER
 from readalongs.portable_tempfile import PortableNamedTemporaryFile
 from readalongs.text.util import load_txt, save_xml
@@ -23,7 +32,7 @@ def test_align(self):
         """Basic alignment test case with XML input"""
         xml_path = os.path.join(self.data_dir, "ej-fra.xml")
         wav_path = os.path.join(self.data_dir, "ej-fra.m4a")
-        results = align_audio(xml_path, wav_path, unit="w")
+        results = align_audio(xml_path, wav_path, unit="w", debug_aligner=True)
 
         # Verify that the same IDs are in the output
         converted_path = os.path.join(self.data_dir, "ej-fra-converted.xml")
@@ -52,6 +61,113 @@ def test_align_text(self):
         for w, xw in zip(words, xml_words):
             self.assertEqual(xw.attrib["id"], w["id"])
 
+        # White-box testing to make sure srt, TextGrid and vtt output will have the
+        # sentences collected correctly.
+        words, sentences = get_word_texts_and_sentences(
+            results["words"], results["tokenized"]
+        )
+        self.assertEqual(len(sentences), 7)
+        self.assertEqual(len(words), 99)
+
+        def make_element(tag, text="", tail=""):
+            """Convenient Element constructor wrapper"""
+            el = etree.Element(tag)
+            el.text = text
+            el.tail = tail
+            return el
+
+        # Do some word doctoring to make sure sub-word units don't cause trouble
+        # This might be nicer in a different test case, but I want to reuse
+        # results from the call above, so I'm glomming it on here...
+        xml = results["tokenized"]
+        for i, word_el in enumerate(xml.xpath(".//w")):
+            if i == 1:
+                # Modify the <w>
+                word_el.text += " stuff"
+            elif i == 2:
+                # Whole <w> text in one <subw>
+                word_el.text = ""
+                word_el.append(make_element("subw", "subwordtext"))
+            elif i == 3:
+                # <w> with three clean <syl> elements
+                word_el.text = ""
+                for i in range(3):
+                    word_el.append(make_element("syl", "syl;"))
+            elif i == 4:
+                # Messy <w> is still valid structure
+                word_el.text = "head text;"
+                word_el.append(make_element("syl", "syllable text;", "syl tail;"))
+                word_el.tail = "tail from the word itself is ignored;"
+                # etree.dump(word_el)
+            elif i == 5:
+                # Nested sub elements
+                word_el.append(make_element("syl", "syl;", "tail;"))
+                word_el[0].append(make_element("subsyl", "sub;"))
+                word_el.append(make_element("syl", "another syl;"))
+                break
+        _, sentences = get_word_texts_and_sentences(
+            results["words"], results["tokenized"]
+        )
+        self.assertEqual(
+            [w["text"] for w in sentences[1]],
+            [
+                "Je stuff",
+                "subwordtext",
+                "syl;syl;syl;",
+                "head text;syllable text;syl tail;",
+                "Joanissyl;sub;tail;another syl;",
+            ],
+        )
+
+    def test_align_switch_am(self):
+        """Alignment test case with an alternate acoustic model and custom
+        noise dictionary."""
+        xml_path = os.path.join(self.data_dir, "ej-fra.xml")
+        wav_path = os.path.join(self.data_dir, "ej-fra.m4a")
+        # Try with some extra stuff in the noisedict
+        with TemporaryDirectory(prefix="readalongs_am_") as tempdir:
+            custom_am_path = os.path.join(tempdir, "en-us")
+            shutil.copytree(get_model_path("en-us"), custom_am_path)
+            with open(os.path.join(custom_am_path, "noisedict"), "at") as fh:
+                fh.write(";; here is a comment\n")
+                fh.write("[BOGUS] SIL\n")
+            results = align_audio(
+                xml_path, wav_path, unit="w", config={"acoustic_model": custom_am_path}
+            )
+            # Try with no noisedict
+            os.remove(os.path.join(custom_am_path, "noisedict"))
+            results = align_audio(
+                xml_path, wav_path, unit="w", config={"acoustic_model": custom_am_path}
+            )
+        # Verify that the same IDs are in the output
+        converted_path = os.path.join(self.data_dir, "ej-fra-converted.xml")
+        xml = etree.parse(converted_path).getroot()
+        words = results["words"]
+        xml_words = xml.xpath(".//w")
+        self.assertEqual(len(words), len(xml_words))
+        for w, xw in zip(words, xml_words):
+            self.assertEqual(xw.attrib["id"], w["id"])
+
+    def test_align_fail(self):
+        """Alignment test case with bad audio that should fail."""
+        xml_path = os.path.join(self.data_dir, "ej-fra.xml")
+        with PortableNamedTemporaryFile(suffix=".wav") as tf:
+            with wave.open(tf, "wb") as writer:
+                writer.setnchannels(1)
+                writer.setsampwidth(2)
+                writer.setframerate(16000)
+                writer.writeframes(b"\x00\x00")
+            with self.assertRaises(RuntimeError):
+                _ = align_audio(xml_path, tf.name, unit="w")
+
+    def test_bad_align_mode(self):
+        with self.assertRaises(AssertionError):
+            _ = align_audio(
+                os.path.join(self.data_dir, "ej-fra.xml"),
+                os.path.join(self.data_dir, "noise.mp3"),
+                alignment_mode="invalid-mode",
+            )
+
 
 class TestXHTML(BasicTestCase):
     """Test converting the output to xhtml"""
@@ -66,7 +182,7 @@ def test_convert(self):
             txt = load_txt(tf.name)
             self.maxDiff = None
             self.assertEqual(
-                txt, load_txt(os.path.join(self.data_dir, "ej-fra-converted.xhtml")),
+                txt, load_txt(os.path.join(self.data_dir, "ej-fra-converted.xhtml"))
             )
 
 
diff --git a/test/test_g2p_cli.py b/test/test_g2p_cli.py
index 366f2888..48a7c2bc 100755
--- a/test/test_g2p_cli.py
+++ b/test/test_g2p_cli.py
@@ -1,8 +1,9 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Test suite for the readalongs g2p CLI command"""
 
 import os
+import re
 from unittest import main
 
 from basic_test_case import BasicTestCase
@@ -10,11 +11,21 @@
 from sound_swallower_stub import SoundSwallowerStub
 
 from readalongs.align import align_audio
-from readalongs.cli import align, g2p, prepare, tokenize
+from readalongs.cli import align, g2p, make_xml, tokenize
 from readalongs.log import LOGGER
 from readalongs.text.convert_xml import convert_xml
 
 
+def run_convert_xml(input_string):
+    """wrap convert_xml to make unit testing easier"""
+    return etree.tounicode(convert_xml(etree.fromstring(input_string))[0])
+
+
+def two_xml_elements(xml_text):
+    """Extract the opening part of the leading two XML elements in xml_text"""
+    return xml_text[: 1 + xml_text.find(">", 1 + xml_text.find(">"))]
+
+
 class TestG2pCli(BasicTestCase):
     """Test suite for the readalongs g2p CLI command"""
 
@@ -70,15 +81,31 @@ def test_mixed_langs(self):
                 f"output {g2p_file} and reference {ref_file} differ.",
             )
 
-    # Write text to a temp file, pass it through prepare -l lang, and then tokenize,
+    def test_invoke_with_obsolete_switches(self):
+        """Using obsolete options should yield a helpful error message"""
+
+        input_file = os.path.join(self.data_dir, "fra-tokenized.xml")
+        g2p_file = os.path.join(self.tempdir, "obsolete1.xml")
+        results = self.runner.invoke(
+            g2p, ["--g2p-fallback", "fra:und", input_file, g2p_file]
+        )
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("is obsolete", results.output)
+
+        g2p_file = os.path.join(self.tempdir, "obsolete2.xml")
+        results = self.runner.invoke(g2p, ["--g2p-verbose", input_file, g2p_file])
+        self.assertNotEqual(results.exit_code, 0)
+        self.assertIn("is obsolete", results.output)
+
+    # Write text to a temp file, pass it through make-xml -l lang, and then tokenize,
     # saving the final results into filename.
     # filename is assumed to be inside self.tempdir, so we count on tearDown() to clean up.
-    def write_prepare_tokenize(self, text, lang, filename):
+    def write_make_xml_tokenize(self, text, lang, filename):
         """Create the input file for some test cases in this suite"""
         with open(filename + ".input.txt", "w", encoding="utf8") as f:
             print(text, file=f)
         self.runner.invoke(
-            prepare,
+            make_xml,
             [
                 "-l",
                 lang,
@@ -92,7 +119,7 @@ def write_prepare_tokenize(self, text, lang, filename):
     def test_english_oov(self):
         """readalongs g2p should handle English OOVs correctly"""
         tok_file = os.path.join(self.tempdir, "tok.xml")
-        self.write_prepare_tokenize("This is a froobnelicious OOV.", "eng", tok_file)
+        self.write_make_xml_tokenize("This is a froobnelicious OOV.", "eng", tok_file)
         results = self.runner.invoke(g2p, [tok_file])
         if self.show_invoke_output:
             print(
@@ -106,7 +133,7 @@ def test_english_oov(self):
 
         # with a fall back to und, it works
         tok_file_with_fallback = os.path.join(self.tempdir, "fallback.xml")
-        self.write_prepare_tokenize(
+        self.write_make_xml_tokenize(
             "This is a froobnelicious OOV.", "eng:und", tok_file_with_fallback
         )
         results = self.runner.invoke(g2p, [tok_file_with_fallback, "-"])
@@ -122,7 +149,7 @@ def test_french_oov(self):
         """readalongs g2p should handle French OOVs correctly"""
         tok_file = os.path.join(self.tempdir, "tok.xml")
         g2p_file = os.path.join(self.tempdir, "g2p.xml")
-        self.write_prepare_tokenize(
+        self.write_make_xml_tokenize(
             "Le ñ n'est pas dans l'alphabet français.", "fra", tok_file
         )
         results = self.runner.invoke(g2p, [tok_file, g2p_file])
@@ -137,7 +164,7 @@ def test_french_oov(self):
 
         # with a fall back to und, it works
         tok_file2 = os.path.join(self.tempdir, "tok2.xml")
-        self.write_prepare_tokenize(
+        self.write_make_xml_tokenize(
             "Le ñ n'est pas dans l'alphabet français.", "fra:und", tok_file2
         )
         g2p_file2 = os.path.join(self.tempdir, "g2p-fallback.xml")
@@ -154,7 +181,7 @@ def test_three_way_fallback(self):
         """readalongs g2p --g2p-fallback with multi-step cascades"""
         tok_file = os.path.join(self.tempdir, "text.tokenized.xml")
         g2p_file = os.path.join(self.tempdir, "text.g2p.xml")
-        self.write_prepare_tokenize(
+        self.write_make_xml_tokenize(
             "In French été works but Nunavut ᓄᓇᕗᑦ does not.", "eng:fra:iku", tok_file
         )
         # Here we also test generating the output filename from the input filename
@@ -172,7 +199,7 @@ def test_three_way_fallback(self):
 
         # Run with verbose output and look for the warning messages
         results = self.runner.invoke(
-            g2p, ["--g2p-verbose", tok_file, g2p_file + "verbose"],
+            g2p, ["--debug-g2p", tok_file, g2p_file + "verbose"]
         )
         if self.show_invoke_output:
             print(
@@ -186,7 +213,7 @@ def test_three_way_fallback(self):
 
         # this text also works with "und", now that we use unidecode
         tok_file2 = os.path.join(self.tempdir, "text.tokenized2.xml")
-        self.write_prepare_tokenize(
+        self.write_make_xml_tokenize(
             "In French été works but Nunavut ᓄᓇᕗᑦ does not.", "eng:und", tok_file2
         )
         results = self.runner.invoke(g2p, [tok_file2, "-"])
@@ -214,6 +241,7 @@ def test_align_with_error(self):
             )
         self.assertNotEqual(results.exit_code, 0)
         self.assertIn("could not be g2p", results.output)
+        self.assertNotIn("Number of aligned segments", results.output)
 
         with SoundSwallowerStub("t0b0d0p0s0w0:920:1620", "t0b0d0p0s1w0:1620:1690"):
             results = self.runner.invoke(
@@ -236,8 +264,8 @@ def test_align_with_error(self):
             )
         self.assertIn("Trying fallback: fra", results.output)
         self.assertIn("Trying fallback: iku", results.output)
-        # We get the found segments printed only if g2p succeeded:
-        self.assertIn("Segment: t0b0d0p0s0w0", results.output)
+        self.assertNotIn("could not be g2p", results.output)
+        self.assertIn("Number of aligned segments", results.output)
 
     def test_with_stdin(self):
         """readalongs g2p running with stdin as input"""
@@ -287,10 +315,6 @@ def test_align_with_preg2p(self):
             self.assertIn("HH EH Y", dict_file)  # "Hej" in dan
             self.assertIn("D G IY T UW P IY D", dict_file)  # pre-g2p'd OOV
 
-    def run_convert_xml(self, input_string):
-        """wrap convert_xml to make unit testing easier"""
-        return etree.tounicode(convert_xml(etree.fromstring(input_string))[0])
-
     def test_convert_xml(self):
         """unit testing for readalongs.text.convert_xml.convert_xml()
 
@@ -298,12 +322,12 @@ def test_convert_xml(self):
         It's not very well named, but it still needs unit testing. :)
         """
         self.assertEqual(
-            self.run_convert_xml("<t><w>word</w><w></w><n>not word</n></t>"),
+            run_convert_xml("<t><w>word</w><w></w><n>not word</n></t>"),
             '<t><w ARPABET="W OW D D">word</w><w/><n>not word</n></t>',
         )
 
         self.assertEqual(
-            self.run_convert_xml(
+            run_convert_xml(
                 '<s><w xml:lang="eng">Patrick</w><w xml:lang="kwk-umista">xtła̱n</w></s>'
             ),
             '<s><w xml:lang="eng" ARPABET="P AE T R IH K">Patrick</w>'
@@ -311,10 +335,88 @@ def test_convert_xml(self):
         )
 
         self.assertEqual(
-            self.run_convert_xml('<s><w xml:lang="und">Patrickxtła̱n</w></s>'),
+            run_convert_xml('<s><w xml:lang="und">Patrickxtła̱n</w></s>'),
             '<s><w xml:lang="und" ARPABET="P AA T D IY CH K K T L AA N">Patrickxtła̱n</w></s>',
         )
 
+    def test_convert_xml_with_newlines(self):
+        """Newlines inside words are weird, but they should not cause errors"""
+
+        def compact_arpabet(xml_string: str) -> str:
+            etree_root = etree.fromstring(xml_string)
+            arpabet = etree_root[0].attrib["ARPABET"]
+            return re.sub(r"\s+", " ", arpabet)
+
+        converted_1 = run_convert_xml(
+            """<s><w>
+               <part>first part of the word</part>
+               <part>second part of the word</part>
+               </w></s>"""
+        )
+        converted_2 = run_convert_xml(
+            "<s><w><part>first part of the word</part><part>second part of the word</part></w></s>"
+        )
+        self.assertEqual(compact_arpabet(converted_1), compact_arpabet(converted_2))
+
+    def test_convert_xml_subwords(self):
+        """Unit testing for reintroducing subword units"""
+        self.assertEqual(
+            run_convert_xml(
+                '<s><w><part xml:lang="eng">Patrick</part><part xml:lang="kwk-umista">xtła̱n</part></w></s>'
+            ),
+            '<s><w ARPABET="P AE T R IH K K Y T S AH N"><part xml:lang="eng">Patrick</part>'
+            '<part xml:lang="kwk-umista">xtła̱n</part></w></s>',
+        )
+
+        self.assertEqual(
+            run_convert_xml(
+                '<s><w>foo<syl xml:lang="eng">Patrick</syl>bar<syl xml:lang="kwk-umista">xtła̱n</syl>baz</w></s>'
+            ),
+            '<s><w ARPABET="F OW OW P AE T R IH K B AA D K Y T S AH N B AA Z">'
+            'foo<syl xml:lang="eng">Patrick</syl>bar<syl xml:lang="kwk-umista">xtła̱n</syl>baz</w></s>',
+        )
+
+        converted_by_syllable = run_convert_xml(
+            '<s><w xml:lang="und"><syl>abc</syl><syl>def</syl><syl>ghi</syl></w></s>'
+        )
+        converted_as_a_whole = run_convert_xml('<s><w xml:lang="und">abcdefghi</w></s>')
+        self.assertEqual(
+            two_xml_elements(converted_by_syllable),
+            two_xml_elements(converted_as_a_whole),
+        )
+
+        moh_eg_with_highlights = "<s xml:lang='moh'><w><span class='pronoun'>tati</span><span class='root'>atkèn:se</span><span class='aspect'>hkwe'</span></w></s>"
+        moh_eg_merged = "<s xml:lang='moh'><w>tatiatkèn:sehkwe'</w></s>"
+        self.assertEqual(two_xml_elements(moh_eg_merged), "<s xml:lang='moh'><w>")
+        self.assertEqual(
+            two_xml_elements(run_convert_xml(moh_eg_with_highlights)),
+            two_xml_elements(run_convert_xml(moh_eg_merged)),
+        )
+
+        moh_example_input_full = """
+            <document xml:lang='moh'>
+              <s>
+                <w>
+                  <span class='pronoun'>tati</span>
+                  <span class='root'>atkèn:se</span>
+                  <span class='aspect'>hkwe'</span>
+                </w>
+              </s>
+            </document>"""
+        _ = run_convert_xml(moh_example_input_full)
+
+        example_with_fallback_lang = """
+            <document xml:lang="fra" fallback-langs="eng"><s>
+              <w><part lang="fra">ceci</part><part lang="iku">not_really_iku</part></w>
+            </s></document>"""
+        with self.assertLogs(LOGGER, level="WARNING") as cm:
+            result = run_convert_xml(example_with_fallback_lang)
+        self.assertIn("S AH S IY not_really_iku", result)
+        logger_output = "\n".join(cm.output)
+        self.assertIn(
+            'No valid g2p conversion found for "not_really_iku"', logger_output
+        )
+
     def test_convert_xml_invalid(self):
         """test readalongs.text.convert_xml.convert_xml() with invalid input"""
         xml = etree.fromstring('<s><w ARPABET="V AA L IY D">valid</w></s>')
@@ -341,11 +443,12 @@ def test_invalid_langs_in_xml(self):
         """
         )
         with self.assertLogs(LOGGER, level="WARNING") as cm:
-            c_xml, valid = convert_xml(xml)
+            c_xml, valid = convert_xml(xml, verbose_warnings=True)
         self.assertFalse(valid)
         logger_output = "\n".join(cm.output)
-        self.assertIn('"foo": invalid language code', logger_output)
-        self.assertIn('"crx-syl": no path to "eng-arpabet"', logger_output)
+        self.assertIn("No lang", logger_output)
+        self.assertIn("foo", logger_output)
+        self.assertIn('no path from "crx-syl"', logger_output)
 
 
 if __name__ == "__main__":
diff --git a/test/test_indices.py b/test/test_indices.py
deleted file mode 100755
index 59c85c64..00000000
--- a/test/test_indices.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python3
-
-"""Test suite for handling g2p indices"""
-
-from unittest import TestCase, main
-
-from g2p import make_g2p
-from g2p.mappings import Mapping
-from g2p.transducer import Transducer
-
-from readalongs.log import LOGGER
-
-
-class TestIndices(TestCase):
-    """Test suite for handling g2p indices"""
-
-    def test_basic_composition(self):
-        """Indices mapped through a two-step basic composition"""
-        mapping = Mapping([{"in": "a", "out": "b"}])
-        transducer = Transducer(mapping)
-        tg = transducer("abba")
-        self.assertEqual(tg.output_string, "bbbb")
-        self.assertEqual(tg.edges, [(0, 0), (1, 1), (2, 2), (3, 3)])
-
-    def test_tiered_composition(self):
-        """Indices mapped through a more complex, three-step composition"""
-        transducer = make_g2p("dan", "eng-arpabet")
-        tg = transducer("hej")
-        self.assertEqual(tg.output_string, "HH EH Y ")
-        self.assertEqual(
-            tg.edges,
-            [
-                [(0, 0), (1, 1), (2, 2)],
-                [(0, 0), (1, 1), (2, 2)],
-                [(0, 0), (0, 1), (0, 2), (1, 3), (1, 4), (1, 5), (2, 6), (2, 7)],
-            ],
-        )
-        self.assertEqual(
-            tg.pretty_edges(),
-            [
-                [["h", "h"], ["e", "ɛ"], ["j", "j"]],
-                [["h", "h"], ["ɛ", "ɛ"], ["j", "j"]],
-                [
-                    ["h", "H"],
-                    ["h", "H"],
-                    ["h", " "],
-                    ["ɛ", "E"],
-                    ["ɛ", "H"],
-                    ["ɛ", " "],
-                    ["j", "Y"],
-                    ["j", " "],
-                ],
-            ],
-        )
-
-    def test_composition_with_none(self):
-        transducer = make_g2p("ctp", "eng-arpabet")
-        tg = transducer("Qne\u1D2C")
-        self.assertEqual(tg.output_string, "HH N EY ")
-        self.assertEqual(
-            tg.edges,
-            [
-                [(0, 0), (1, 1), (2, 2), (3, None)],
-                [(0, 0), (1, 1), (2, 2), (2, 3)],
-                [(0, 0), (0, 1), (0, 2), (1, 3), (1, 4), (2, 5), (3, 6), (3, 7)],
-            ],
-        )
-        self.assertEqual(
-            tg.pretty_edges(),
-            [
-                [["q", "ʔ"], ["n", "n"], ["e", "e"], ["ᴬ", None]],
-                [["ʔ", "ʔ"], ["n", "n"], ["e", "e"], ["e", "ː"]],
-                [
-                    ["ʔ", "H"],
-                    ["ʔ", "H"],
-                    ["ʔ", " "],
-                    ["n", "N"],
-                    ["n", " "],
-                    ["e", "E"],
-                    ["ː", "Y"],
-                    ["ː", " "],
-                ],
-            ],
-        )
-
-    def test_fra(self):
-        transducer = make_g2p("fra", "eng-arpabet")
-        tg = transducer("mais")
-        self.assertEqual(tg.output_string, "M EH ")
-
-
-if __name__ == "__main__":
-    LOGGER.setLevel("DEBUG")
-    main()
diff --git a/test/test_prepare_cli.py b/test/test_make_xml_cli.py
similarity index 67%
rename from test/test_prepare_cli.py
rename to test/test_make_xml_cli.py
index d5b09975..7cb59f75 100755
--- a/test/test_prepare_cli.py
+++ b/test/test_make_xml_cli.py
@@ -1,6 +1,6 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
-"""Test suite for the readalongs prepare CLI command"""
+"""Test suite for the readalongs make_xml CLI command"""
 
 import io
 import os
@@ -10,13 +10,13 @@
 
 from basic_test_case import BasicTestCase
 
-from readalongs.align import create_input_tei
-from readalongs.cli import align, prepare
+from readalongs.align import create_input_tei, create_tei_from_text
+from readalongs.cli import align, make_xml
 from readalongs.log import LOGGER
 
 
-class TestPrepareCli(BasicTestCase):
-    """Test suite for the readalongs prepare CLI command"""
+class TestMakeXMLCli(BasicTestCase):
+    """Test suite for the readalongs make-xml CLI command"""
 
     def setUp(self):
         super().setUp()
@@ -25,56 +25,63 @@ def setUp(self):
             pass
 
     def test_invoke_prepare(self):
-        """Basic usage of readalongs prepare"""
+        """Basic usage of deprecated readalongs prepare"""
         results = self.runner.invoke(
-            prepare,
+            make_xml,
             ["-l", "atj", "-d", self.empty_file, os.path.join(self.tempdir, "delme")],
         )
         self.assertEqual(results.exit_code, 0)
-        self.assertRegex(results.stdout, "Running readalongs prepare")
-        # print('Prepare.stdout: {}'.format(results.stdout))
+
+    def test_invoke_make_xml(self):
+        """Basic usage of readalongs make-xml"""
+        results = self.runner.invoke(
+            make_xml,
+            ["-l", "atj", "-d", self.empty_file, os.path.join(self.tempdir, "delme")],
+        )
+        self.assertEqual(results.exit_code, 0)
+        self.assertRegex(results.stdout, "Running readalongs make-xml")
 
     def test_no_lang(self):
-        """Error case: readalongs prepare without the mandatory -l switch"""
+        """Error case: readalongs make-xml without the mandatory -l switch"""
         results = self.runner.invoke(
-            prepare, [self.empty_file, self.empty_file + ".xml"]
+            make_xml, [self.empty_file, self.empty_file + ".xml"]
         )
         self.assertNotEqual(results.exit_code, 0)
         self.assertRegex(results.stdout, "Missing.*language")
 
     def test_inputfile_not_exist(self):
         """Error case: input file does not exist"""
-        results = self.runner.invoke(prepare, "-l atj /file/does/not/exist delme")
+        results = self.runner.invoke(make_xml, "-l atj /file/does/not/exist delme")
         self.assertNotEqual(results.exit_code, 0)
         self.assertRegex(results.stdout, "No such file or directory")
 
     def test_outputfile_exists(self):
-        """Existing output file should not be overwritten by readalongs prepare by default"""
+        """Existing output file should not be overwritten by readalongs make-xml by default"""
         results = self.runner.invoke(
-            prepare,
+            make_xml,
             ["-l", "atj", self.empty_file, os.path.join(self.tempdir, "exists")],
         )
         results = self.runner.invoke(
-            prepare,
+            make_xml,
             ["-l", "atj", self.empty_file, os.path.join(self.tempdir, "exists")],
         )
         self.assertNotEqual(results.exit_code, 0)
         self.assertRegex(results.stdout, "exists.*overwrite")
 
     def test_output_exists(self):
-        """Make sure readalongs prepare create the expected output file"""
+        """Make sure readalongs make-xml create the expected output file"""
         xmlfile = os.path.join(self.tempdir, "fra.xml")
         results = self.runner.invoke(
-            prepare, ["-l", "fra", os.path.join(self.data_dir, "fra.txt"), xmlfile]
+            make_xml, ["-l", "fra", os.path.join(self.data_dir, "fra.txt"), xmlfile]
         )
         self.assertEqual(results.exit_code, 0)
         self.assertTrue(os.path.exists(xmlfile), "output xmlfile did not get created")
 
     def test_output_correct(self):
-        """Make sure the contents of readalongs prepare's output file is correct."""
+        """Make sure the contents of readalongs make-xml's output file is correct."""
         input_file = os.path.join(self.data_dir, "fra.txt")
         xml_file = os.path.join(self.tempdir, "fra.xml")
-        results = self.runner.invoke(prepare, ["-l", "fra", input_file, xml_file])
+        results = self.runner.invoke(make_xml, ["-l", "fra", input_file, xml_file])
         self.assertEqual(results.exit_code, 0)
 
         ref_file = os.path.join(self.data_dir, "fra-prepared.xml")
@@ -89,8 +96,8 @@ def test_output_correct(self):
             )
 
     def test_input_is_stdin(self):
-        """Validate that readalongs prepare can use stdin as input"""
-        results = self.runner.invoke(prepare, "-l fra -", input="Ceci est un test.")
+        """Validate that readalongs make-xml can use stdin as input"""
+        results = self.runner.invoke(make_xml, "-l fra -", input="Ceci est un test.")
         # LOGGER.warning("Output: {}".format(results.output))
         # LOGGER.warning("Exception: {}".format(results.exception))
         self.assertEqual(results.exit_code, 0)
@@ -98,24 +105,24 @@ def test_input_is_stdin(self):
         self.assertIn('<text xml:lang="fra"', results.stdout)
 
     def test_generate_output_name(self):
-        """Validate readalongs prepare generating the output file name"""
+        """Validate readalongs make-xml generating the output file name"""
         input_file = os.path.join(self.tempdir, "someinput.txt")
         copyfile(os.path.join(self.data_dir, "fra.txt"), input_file)
-        results = self.runner.invoke(prepare, ["-l", "fra", input_file])
+        results = self.runner.invoke(make_xml, ["-l", "fra", input_file])
         LOGGER.warning("Output: {}".format(results.output))
         LOGGER.warning("Exception: {}".format(results.exception))
         self.assertEqual(results.exit_code, 0)
         self.assertRegex(results.stdout, "Wrote.*someinput[.]xml")
         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "someinput.xml")))
 
-    def test_prepare_with_different_newlines(self):
-        """readalongs prepare handling single and double blank lines for paragraphs and pages"""
+    def test_make_xml_with_different_newlines(self):
+        """readalongs make-xml handling single and double blank lines for paragraphs and pages"""
         sent = "Ceci est une phrase."
         linux_file = os.path.join(self.tempdir, "linux_file")
         with open(linux_file, mode="wb") as f:
             file_contents = sent + "\n\n" + sent + "\n\n\n" + sent + "\n"
             f.write(file_contents.encode("ascii"))
-        linux_results = self.runner.invoke(prepare, ["-l", "fra", linux_file, "-"])
+        linux_results = self.runner.invoke(make_xml, ["-l", "fra", linux_file, "-"])
         linux_output = linux_results.output
         # The "Linux" output is the reference output, but we validate it a bit
         # too, with a regex: it has to have 2 pages and 3 paragraphs
@@ -130,36 +137,36 @@ def test_prepare_with_different_newlines(self):
         with open(no_eol_file, mode="wb") as f:
             file_contents = sent + "\n\n" + sent + "\n\n\n" + sent
             f.write(file_contents.encode("ascii"))
-        no_eol_results = self.runner.invoke(prepare, ["-l", "fra", no_eol_file, "-"])
+        no_eol_results = self.runner.invoke(make_xml, ["-l", "fra", no_eol_file, "-"])
         no_eol_output = no_eol_results.output
         self.assertEqual(
             linux_output,
             no_eol_output,
-            "An absent final newline should not affect prepare",
+            "An absent final newline should not affect make-xml",
         )
 
         dos_file = os.path.join(self.tempdir, "dos_file")
         with open(dos_file, mode="wb") as f:
             file_contents = sent + "\r\n\r\n" + sent + "\r\n\r\n\r\n" + sent + "\r\n"
             f.write(file_contents.encode("ascii"))
-        dos_results = self.runner.invoke(prepare, ["-l", "fra", dos_file, "-"])
+        dos_results = self.runner.invoke(make_xml, ["-l", "fra", dos_file, "-"])
         dos_output = dos_results.output
         self.assertEqual(
             linux_output,
             dos_output,
-            "Using DOS-style newlines should not affect prepare",
+            "Using DOS-style newlines should not affect make-xml",
         )
 
         mac_file = os.path.join(self.tempdir, "mac_file")
         with open(mac_file, mode="wb") as f:
             file_contents = sent + "\r\r" + sent + "\r\r\r" + sent + "\r"
             f.write(file_contents.encode("ascii"))
-        mac_results = self.runner.invoke(prepare, ["-l", "fra", mac_file, "-"])
+        mac_results = self.runner.invoke(make_xml, ["-l", "fra", mac_file, "-"])
         mac_output = mac_results.output
         self.assertEqual(
             linux_output,
             mac_output,
-            "Using old Mac-style newlines should not affect prepare",
+            "Using old Mac-style newlines should not affect make-xml",
         )
 
     def test_create_input_tei_errors(self):
@@ -177,42 +184,42 @@ def test_create_input_tei_errors(self):
                 input_file_name=os.path.join(self.data_dir, "fra.txt")
             )
 
-    def test_prepare_multiple_langs(self):
+    def test_make_xml_multiple_langs(self):
         """Giving multiple langs to -l replaces the old --g2p-fallback option."""
         input_file = os.path.join(self.data_dir, "fra.txt")
         results = self.runner.invoke(
-            prepare, ["-l", "fra", "-l", "iku:und", input_file, "-"]
+            make_xml, ["-l", "fra", "-l", "iku:und", input_file, "-"]
         )
         self.assertEqual(results.exit_code, 0)
         self.assertIn('<text xml:lang="fra" fallback-langs="iku,und">', results.output)
-        results = self.runner.invoke(prepare, ["-l", "fra,iku:und", input_file, "-"])
+        results = self.runner.invoke(make_xml, ["-l", "fra,iku:und", input_file, "-"])
         self.assertEqual(results.exit_code, 0)
         self.assertIn('<text xml:lang="fra" fallback-langs="iku,und">', results.output)
         results = self.runner.invoke(
-            prepare, ["-l", "fra:iku", "-l", "und", input_file, "-"]
+            make_xml, ["-l", "fra:iku", "-l", "und", input_file, "-"]
         )
         self.assertEqual(results.exit_code, 0)
         self.assertIn('<text xml:lang="fra" fallback-langs="iku,und">', results.output)
 
-    def test_prepare_invalid_lang(self):
+    def test_make_xml_invalid_lang(self):
         input_file = os.path.join(self.data_dir, "fra.txt")
         results = self.runner.invoke(
-            prepare, ["-l", "fra:notalang:und", input_file, "-"]
+            make_xml, ["-l", "fra:notalang:und", input_file, "-"]
         )
         self.assertNotEqual(results.exit_code, 0)
         self.assertRegex(results.output, r"Invalid value.*'notalang'")
 
-    def test_prepare_invalid_utf8_input(self):
+    def test_make_xml_invalid_utf8_input(self):
         noise_file = os.path.join(self.data_dir, "noise.mp3")
 
         # Read noise.mp3 as if it was utf8 text, via create_input_tei(input_file_handle)
-        results = self.runner.invoke(prepare, ["-l", "fra", noise_file, "-"])
+        results = self.runner.invoke(make_xml, ["-l", "fra", noise_file, "-"])
         self.assertNotEqual(results.exit_code, 0)
         self.assertIn("provide a correctly encoded utf-8", results.output)
 
         # Read noise.mp3 as if it was utf8 text, via create_input_tei(input_file_name)
         results = self.runner.invoke(
-            prepare, ["-l", "fra", noise_file, os.path.join(self.tempdir, "noise.xml")]
+            make_xml, ["-l", "fra", noise_file, os.path.join(self.tempdir, "noise.xml")]
         )
         self.assertNotEqual(results.exit_code, 0)
         self.assertIn("provide a correctly encoded utf-8", results.output)
@@ -231,6 +238,19 @@ def test_prepare_invalid_utf8_input(self):
         self.assertNotEqual(results.exit_code, 0)
         self.assertIn("provide a correctly encoded utf-8", results.output)
 
+    def test_blank_lines_stripped(self):
+        """Blank lines for paragraph and page breaks are allowed to have whitespace"""
+        input_text_with_spaces = "Ceci est un test\n \nParagraphe\n\t \n \nPage\n"
+        input_text_stripped = "Ceci est un test\n\nParagraphe\n\n\nPage\n"
+
+        def text2lines(text: str):
+            return io.StringIO(text).readlines()
+
+        self.assertEqual(
+            create_tei_from_text(text2lines(input_text_with_spaces), ["fra"]),
+            create_tei_from_text(text2lines(input_text_stripped), ["fra"]),
+        )
+
 
 if __name__ == "__main__":
     main()
diff --git a/test/test_misc.py b/test/test_misc.py
index 21a96347..76fd13a9 100755
--- a/test/test_misc.py
+++ b/test/test_misc.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Test suite for misc stuff that don't need their own stand-alone suite"""
 
@@ -10,8 +10,13 @@
 from test_dna_utils import segments_from_pairs
 
 from readalongs.align import split_silences
-from readalongs.text.util import get_attrib_recursive, get_lang_attrib, parse_time
-from readalongs.util import JoinerCallback
+from readalongs.text.util import (
+    get_attrib_recursive,
+    get_lang_attrib,
+    get_word_text,
+    parse_time,
+)
+from readalongs.util import JoinerCallbackForClick
 
 
 class TestMisc(TestCase):
@@ -154,13 +159,35 @@ def test_get_attrib_recursive(self):
         # get_attrib_recursive() --EJJ Nov 2021
 
     def test_joiner_callback(self):
-        cb = JoinerCallback(iter("qwer"))  # iterable over four characters
+        cb = JoinerCallbackForClick(iter("qwer"))  # iterable over four characters
         self.assertEqual(cb(None, None, ["e:r"]), ["e", "r"])
         self.assertEqual(cb(None, None, ["q,w"]), ["q", "w"])
         with self.assertRaises(click.BadParameter):
             cb(None, None, ["q:e", "a,w"])
         self.assertEqual(cb(None, None, ["r:q", "w"]), ["r", "q", "w"])
 
+    def test_get_word_text(self):
+        self.assertEqual(
+            get_word_text(etree.fromstring("<w>basicword</w>")),
+            "basicword",
+        )
+        self.assertEqual(
+            get_word_text(etree.fromstring("<w><subw>subwcase</subw></w>")),
+            "subwcase",
+        )
+        self.assertEqual(
+            get_word_text(etree.fromstring("<w><syl>syl1</syl><syl>syl2</syl></w>")),
+            "syl1syl2",
+        )
+        self.assertEqual(
+            get_word_text(etree.fromstring("<w>text<subw>sub</subw>tail</w>")),
+            "textsubtail",
+        )
+        self.assertEqual(
+            get_word_text(etree.fromstring("<w><a>a<b>b</b>c</a>d</w>")),
+            "abcd",
+        )
+
 
 if __name__ == "__main__":
     main()
diff --git a/test/test_package_urls.py b/test/test_package_urls.py
index 4f76598f..d27df58a 100755
--- a/test/test_package_urls.py
+++ b/test/test_package_urls.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 from unittest import main
 
diff --git a/test/test_silence.py b/test/test_silence.py
index 212ace8a..9b6691e2 100755
--- a/test/test_silence.py
+++ b/test/test_silence.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Test suite for inserting silences into a readalong"""
 
diff --git a/test/test_smil.py b/test/test_smil.py
new file mode 100644
index 00000000..4217b7c3
--- /dev/null
+++ b/test/test_smil.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+
+"""
+Unit test suite for the smil writing and parsing utilities
+"""
+
+from textwrap import dedent
+from unittest import main
+
+from basic_test_case import BasicTestCase
+
+from readalongs.text.make_smil import make_smil, parse_smil
+
+
+class TestSmilUtilities(BasicTestCase):
+    """Unit test suite for the smil writing and parsing utilities"""
+
+    def setUp(self):
+        super().setUp()
+        self.words = [
+            {"id": "w1", "start": 0.01, "end": 0.75},
+            {"id": "w2", "start": 0.8, "end": 1.04},
+            # Make one of the ID's a utf-8 character, to test for handling that correctly.
+            {"id": "wé3", "start": 1.2, "end": 1.33},
+        ]
+        self.smil = dedent(
+            """\
+            <smil xmlns="http://www.w3.org/ns/SMIL" version="3.0">
+                <body>
+                    <par id="par-w1">
+                        <text src="my_text_path#w1"/>
+                        <audio src="my_audio_path" clipBegin="0.01" clipEnd="0.75"/>
+                    </par>
+                    <par id="par-w2">
+                        <text src="my_text_path#w2"/>
+                        <audio src="my_audio_path" clipBegin="0.8" clipEnd="1.04"/>
+                    </par>
+                    <par id="par-wé3">
+                        <text src="my_text_path#wé3"/>
+                        <audio src="my_audio_path" clipBegin="1.2" clipEnd="1.33"/>
+                    </par>
+                </body>
+            </smil>
+            """
+        )
+
+    def test_make_smil(self):
+        text_path = "my_text_path"
+        audio_path = "my_audio_path"
+        smil = make_smil(text_path, audio_path, self.words)
+        self.assertEqual(smil, self.smil)
+
+    def test_parse_smil(self):
+        words = parse_smil(self.smil)
+        self.assertEqual(words, self.words)
+
+    def test_parse_bad_smil(self):
+        with self.assertRaises(ValueError):
+            _ = parse_smil("this is not XML")
+
+        missing_id = dedent(
+            """\
+            <smil xmlns="http://www.w3.org/ns/SMIL" version="3.0">
+                <body>
+                    <par id="par-w1">
+                        <text src="my_text_path"/>
+                        <audio src="my_audio_path" clipBegin="0.01" clipEnd="0.75"/>
+                    </par>
+                </body>
+            </smil>
+            """
+        )
+        with self.assertRaises(ValueError):
+            _ = parse_smil(missing_id)
+
+        missing_clip_end = dedent(
+            """\
+            <smil xmlns="http://www.w3.org/ns/SMIL" version="3.0">
+                <body>
+                    <par id="par-w1">
+                        <text src="my_text_path#w1"/>
+                        <audio src="my_audio_path" clipBegin="0.01"/>
+                    </par>
+                </body>
+            </smil>
+            """
+        )
+        with self.assertRaises(ValueError):
+            _ = parse_smil(missing_clip_end)
+
+        bad_float = dedent(
+            """\
+            <smil xmlns="http://www.w3.org/ns/SMIL" version="3.0">
+                <body>
+                    <par id="par-w1">
+                        <text src="my_text_path#w1"/>
+                        <audio src="my_audio_path" clipBegin="a.bc" clipEnd="2.34"/>
+                    </par>
+                </body>
+            </smil>
+            """
+        )
+        with self.assertRaises(ValueError):
+            _ = parse_smil(bad_float)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/test_temp_file.py b/test/test_temp_file.py
index 0f38924b..e468f6ac 100755
--- a/test/test_temp_file.py
+++ b/test/test_temp_file.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Test PortableNamedTemporaryFile class"""
 
diff --git a/test/test_tokenize_cli.py b/test/test_tokenize_cli.py
index 87497cc7..59e6f58f 100755
--- a/test/test_tokenize_cli.py
+++ b/test/test_tokenize_cli.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Test suite for readalongs tokenize"""
 
@@ -8,7 +8,7 @@
 
 from basic_test_case import BasicTestCase
 
-from readalongs.cli import prepare, tokenize
+from readalongs.cli import make_xml, tokenize
 
 # from readalongs.log import LOGGER
 
@@ -17,12 +17,13 @@ class TestTokenizeCli(BasicTestCase):
     """Test suite for the readalongs tokenize CLI command"""
 
     def setUp(self):
-        """setUp() creates self.tempdir and prepares an XML file for use in other tests"""
+        """setUp() creates self.tempdir and makes an XML file for use in other tests"""
         super().setUp()
 
         self.xmlfile = os.path.join(self.tempdir, "fra.xml")
         _ = self.runner.invoke(
-            prepare, ["-l", "fra", os.path.join(self.data_dir, "fra.txt"), self.xmlfile]
+            make_xml,
+            ["-l", "fra", os.path.join(self.data_dir, "fra.txt"), self.xmlfile],
         )
 
     def test_invoke_tok(self):
diff --git a/test/test_tokenize_xml.py b/test/test_tokenize_xml.py
index 84dfa8e0..4fd3599e 100755
--- a/test/test_tokenize_xml.py
+++ b/test/test_tokenize_xml.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python
 
 """Unit test suite for our XML tokenizer module"""
 
diff --git a/test/test_web_api.py b/test/test_web_api.py
new file mode 100755
index 00000000..c0a2e93c
--- /dev/null
+++ b/test/test_web_api.py
@@ -0,0 +1,426 @@
+#!/usr/bin/env python
+
+import os
+import re
+from textwrap import dedent
+from unittest import main
+
+from basic_test_case import BasicTestCase
+from fastapi.testclient import TestClient
+from lxml import etree
+
+from readalongs.log import LOGGER
+from readalongs.text.add_ids_to_xml import add_ids
+from readalongs.text.convert_xml import convert_xml
+from readalongs.text.tokenize_xml import tokenize_xml
+from readalongs.util import get_langs
+from readalongs.web_api import FormatName, create_grammar, web_api_app
+
+API_CLIENT = TestClient(web_api_app)
+
+
+class TestWebApi(BasicTestCase):
+    def slurp_data_file(self, filename: str) -> str:
+        """Convenience function to slurp a whole file in self.data_dir"""
+        with open(os.path.join(self.data_dir, filename), encoding="utf8") as f:
+            return f.read().strip()
+
+    def test_assemble_from_plain_text(self):
+        # Test the assemble endpoint with plain text
+        request = {
+            "text": self.slurp_data_file("ej-fra.txt"),
+            "text_languages": ["fra"],
+        }
+        response = API_CLIENT.post("/api/v1/assemble", json=request)
+        self.assertEqual(response.status_code, 200)
+
+    def test_bad_path(self):
+        # Test a request to a path that doesn't exist
+        response = API_CLIENT.get("/pathdoesntexist")
+        self.assertEqual(response.status_code, 404)
+
+    def test_bad_method(self):
+        # Test a request to a valid path with a bad method
+        response = API_CLIENT.get("/api/v1/assemble")
+        self.assertEqual(response.status_code, 405)
+
+    def test_assemble_from_xml(self):
+        # Test the assemble endpoint with XML
+        request = {
+            "encoding": "utf-8",  # for bwd compat, make sure the encoding is allowed but ignored
+            "xml": self.slurp_data_file("ej-fra.xml"),
+            "text_languages": ["fra"],
+        }
+        response = API_CLIENT.post("/api/v1/assemble", json=request)
+        self.assertEqual(response.status_code, 200)
+
+    def test_bad_xml(self):
+        # Test the assemble endpoint with invalid XML
+        request = {
+            "xml": "this is not xml",
+            "text_languages": ["fra"],
+        }
+        response = API_CLIENT.post("/api/v1/assemble", json=request)
+        self.assertEqual(response.status_code, 422)
+
+    def test_create_grammar(self):
+        # Test the create grammar function
+        parsed = etree.fromstring(
+            bytes(self.slurp_data_file("ej-fra.xml"), encoding="utf8")
+        )
+        tokenized = tokenize_xml(parsed)
+        ids_added = add_ids(tokenized)
+        g2ped, valid = convert_xml(ids_added)
+        word_dict, fsg, text = create_grammar(g2ped)
+        self.assertTrue(valid)
+        self.assertIn("Auto-generated JSGF grammar", fsg)
+        self.assertEqual(len(word_dict), len(text.split()))
+        self.assertEqual(len(word_dict), 99)
+
+    def test_bad_g2p(self):
+        # Test the assemble endpoint with invalid g2p languages
+        request = {
+            "text": "blah blah",
+            "text_languages": ["test"],
+        }
+        with self.assertLogs(LOGGER, "ERROR"):
+            response = API_CLIENT.post("/api/v1/assemble", json=request)
+        self.assertEqual(response.status_code, 422)
+
+    def test_langs(self):
+        # Test the langs endpoint
+        response = API_CLIENT.get("/api/v1/langs")
+        self.assertEqual(response.json(), get_langs()[1])
+        self.assertEqual(set(response.json().keys()), set(get_langs()[0]))
+
+    def test_debug(self):
+        # Test the assemble endpoint with debug mode on
+        request = {
+            "text": self.slurp_data_file("ej-fra.txt"),
+            "debug": True,
+            "text_languages": ["fra"],
+        }
+        response = API_CLIENT.post("/api/v1/assemble", json=request)
+        content = response.json()
+        self.assertEqual(content["input"], request)
+        self.assertGreater(len(content["tokenized"]), 10)
+        self.assertGreater(len(content["parsed"]), 10)
+        self.assertGreater(len(content["g2ped"]), 10)
+
+        # Test that debug mode is off by default
+        request = {
+            "text": "Ceci est un test.",
+            "text_languages": ["fra"],
+        }
+        response = API_CLIENT.post("/api/v1/assemble", json=request)
+        content = response.json()
+        self.assertIsNone(content["input"])
+        self.assertIsNone(content["tokenized"])
+        self.assertIsNone(content["parsed"])
+        self.assertIsNone(content["g2ped"])
+
+    hej_verden_xml = dedent(
+        """\
+        <?xml version='1.0' encoding='utf-8'?>
+        <TEI>
+            <text xml:lang="dan" fallback-langs="und" id="t0">
+                <body id="t0b0">
+                    <div type="page" id="t0b0d0">
+                        <p id="t0b0d0p0">
+                            <s id="t0b0d0p0s0"><w id="wé0" ARPABET="HH EH Y">hej é</w> <w id="wé1" ARPABET="V Y D EH N">verden à</w></s>
+                        </p>
+                    </div>
+                </body>
+            </text>
+        </TEI>
+        """
+    )
+
+    hej_verden_smil = dedent(
+        """\
+        <smil xmlns="http://www.w3.org/ns/SMIL" version="3.0">
+            <body>
+                <par id="par-wé0">
+                    <text src="hej-verden.xml#wé0"/>
+                    <audio src="hej-verden.mp3" clipBegin="17.745" clipEnd="58.6"/>
+                </par>
+                <par id="par-wé1">
+                    <text src="hej-verden.xml#wé1"/>
+                    <audio src="hej-verden.mp3" clipBegin="58.6" clipEnd="82.19"/>
+                </par>
+            </body>
+        </smil>
+        """
+    )
+
+    def test_convert_to_TextGrid(self):
+        request = {
+            "audio_duration": 83.1,
+            "xml": self.hej_verden_xml,
+            "smil": self.hej_verden_smil,
+        }
+        response = API_CLIENT.post("/api/v1/convert_alignment/textgrid", json=request)
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("aligned.TextGrid", response.headers["content-disposition"])
+        self.assertEqual(
+            response.text,
+            dedent(
+                """\
+                File type = "ooTextFile"
+                Object class = "TextGrid"
+
+                xmin = 0.000000
+                xmax = 83.100000
+                tiers? <exists>
+                size = 2
+                item []:
+                    item [1]:
+                        class = "IntervalTier"
+                        name = "Sentence"
+                        xmin = 0.000000
+                        xmax = 83.100000
+                        intervals: size = 3
+                        intervals [1]:
+                            xmin = 0.000000
+                            xmax = 17.745000
+                            text = ""
+                        intervals [2]:
+                            xmin = 17.745000
+                            xmax = 82.190000
+                            text = "hej é verden à"
+                        intervals [3]:
+                            xmin = 82.190000
+                            xmax = 83.100000
+                            text = ""
+                    item [2]:
+                        class = "IntervalTier"
+                        name = "Word"
+                        xmin = 0.000000
+                        xmax = 83.100000
+                        intervals: size = 4
+                        intervals [1]:
+                            xmin = 0.000000
+                            xmax = 17.745000
+                            text = ""
+                        intervals [2]:
+                            xmin = 17.745000
+                            xmax = 58.600000
+                            text = "hej é"
+                        intervals [3]:
+                            xmin = 58.600000
+                            xmax = 82.190000
+                            text = "verden à"
+                        intervals [4]:
+                            xmin = 82.190000
+                            xmax = 83.100000
+                            text = ""
+                """
+            ),
+        )
+
+    def test_convert_to_eaf(self):
+        request = {
+            "audio_duration": 83.1,
+            "xml": self.hej_verden_xml,
+            "smil": self.hej_verden_smil,
+        }
+        response = API_CLIENT.post("/api/v1/convert_alignment/eaf", json=request)
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("<ANNOTATION_DOCUMENT", response.text)
+        self.assertIn("aligned.eaf", response.headers["content-disposition"])
+
+    def test_convert_to_srt(self):
+        request = {
+            "audio_duration": 83.1,
+            "xml": self.hej_verden_xml,
+            "smil": self.hej_verden_smil,
+        }
+        response = API_CLIENT.post("/api/v1/convert_alignment/srt", json=request)
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("aligned_sentences.srt", response.headers["content-disposition"])
+        self.assertEqual(
+            response.text.replace("\r", ""),  # CRLF->LF, in case we're on Windows.
+            dedent(
+                """\
+                1
+                00:00:17,745 --> 00:01:22,190
+                hej é verden à
+
+                """
+            ),
+        )
+
+        response = API_CLIENT.post(
+            "/api/v1/convert_alignment/srt?tier=word", json=request
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("aligned_words.srt", response.headers["content-disposition"])
+        self.assertEqual(
+            response.text.replace("\r", ""),  # CRLF->LF, in case we're on Windows.
+            dedent(
+                """\
+                1
+                00:00:17,745 --> 00:00:58,600
+                hej é
+
+                2
+                00:00:58,600 --> 00:01:22,190
+                verden à
+
+                """
+            ),
+        )
+
+    def test_convert_to_vtt(self):
+        request = {
+            "encoding": "utf-8",  # for bwd compat, make sure the encoding is allowed but ignored
+            "audio_duration": 83.1,
+            "xml": self.hej_verden_xml,
+            "smil": self.hej_verden_smil,
+        }
+        response = API_CLIENT.post(
+            "/api/v1/convert_alignment/vtt?tier=sentence", json=request
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("aligned_sentences.vtt", response.headers["content-disposition"])
+        self.assertEqual(
+            response.text.replace("\r", ""),  # CRLF->LF, in case we're on Windows.
+            dedent(
+                """\
+                WEBVTT
+
+                00:00:17.745 --> 00:01:22.190
+                hej é verden à
+                """
+            ),
+        )
+
+        response = API_CLIENT.post(
+            "/api/v1/convert_alignment/vtt?tier=word", json=request
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("aligned_words.vtt", response.headers["content-disposition"])
+        self.assertEqual(
+            response.text.replace("\r", ""),  # CRLF->LF, in case we're on Windows.
+            dedent(
+                """\
+                WEBVTT
+
+                00:00:17.745 --> 00:00:58.600
+                hej é
+
+                00:00:58.600 --> 00:01:22.190
+                verden à
+                """
+            ),
+        )
+
+    def test_convert_to_TextGrid_errors(self):
+        request = {
+            "audio_duration": 83.1,
+            "xml": "this is not XML",
+            "smil": self.hej_verden_smil,
+        }
+        response = API_CLIENT.post("/api/v1/convert_alignment/textgrid", json=request)
+        self.assertEqual(response.status_code, 422, "Invalid XML should fail.")
+
+        request = {
+            "audio_duration": 83.1,
+            "xml": self.hej_verden_xml,
+            "smil": "This is not SMIL",
+        }
+        response = API_CLIENT.post("/api/v1/convert_alignment/textgrid", json=request)
+        self.assertEqual(response.status_code, 422, "Invalid SMIL should fail.")
+
+        request = {
+            "audio_duration": -10.0,
+            "xml": self.hej_verden_xml,
+            "smil": self.hej_verden_smil,
+        }
+        response = API_CLIENT.post("/api/v1/convert_alignment/textgrid", json=request)
+        self.assertEqual(response.status_code, 422, "Negative duration should fail.")
+
+    def test_cleanup_temp_dir(self):
+        """Make sure convert's temporary directory actually gets deleted."""
+        request = {
+            "audio_duration": 83.1,
+            "xml": self.hej_verden_xml,
+            "smil": self.hej_verden_smil,
+        }
+        with self.assertLogs(LOGGER, "INFO") as log_cm:
+            response = API_CLIENT.post(
+                "/api/v1/convert_alignment/textgrid", json=request
+            )
+        self.assertEqual(response.status_code, 200)
+        # print(log_cm.output)
+        match = re.search(
+            "Temporary directory: (.*)($|\r|\n)", "\n".join(log_cm.output)
+        )
+        self.assertIsNotNone(match)
+        self.assertFalse(os.path.isdir(match[1]))
+
+    def test_cleanup_even_if_error(self):
+        # This is seriously white-box testing... this XML has IDs that don't
+        # match those in the SMIL file, which will cause an exception deeper in
+        # the code after the temporary directory is created. We exercise here
+        # catching that exception in a sane way, with a 422 status code, while
+        # also making sure the temporary directory gets deleted.
+        mismatch_xml = dedent(
+            """\
+            <?xml version='1.0' encoding='utf-8'?>
+            <TEI>
+                <text xml:lang="dan" fallback-langs="und" id="t0">
+                    <body id="t0b0">
+                        <div type="page" id="t0b0d0">
+                            <p id="t0b0d0p0">
+                                <s id="t0b0d0p0s0"><w id="mismatch0" ARPABET="HH EH Y">hej é</w> <w id="mismatch1" ARPABET="V Y D EH N">verden à</w></s>
+                            </p>
+                        </div>
+                    </body>
+                </text>
+            </TEI>
+            """
+        )
+        request = {
+            "audio_duration": 83.1,
+            "xml": mismatch_xml,
+            "smil": self.hej_verden_smil,
+        }
+        for format_name in FormatName:
+            with self.assertLogs(LOGGER, "INFO") as log_cm:
+                response = API_CLIENT.post(
+                    f"/api/v1/convert_alignment/{format_name.value}", json=request
+                )
+            self.assertEqual(response.status_code, 422)
+            # print(log_cm.output)
+            match = re.search(
+                "Temporary directory: (.*)($|\r|\n)", "\n".join(log_cm.output)
+            )
+            self.assertIsNotNone(match)
+            self.assertFalse(os.path.isdir(match[1]))
+
+    def test_convert_to_bad_format(self):
+        request = {
+            "audio_duration": 83.1,
+            "xml": self.hej_verden_xml,
+            "smil": self.hej_verden_smil,
+        }
+        response = API_CLIENT.post("/api/v1/convert_alignment/badformat", json=request)
+        self.assertEqual(response.status_code, 422)
+
+        request = {
+            "audio_duration": 83.1,
+            "xml": self.hej_verden_xml,
+            "smil": self.hej_verden_smil,
+        }
+        response = API_CLIENT.post("/api/v1/convert_alignment", json=request)
+        self.assertEqual(response.status_code, 404)
+
+        response = API_CLIENT.post(
+            "/api/v1/convert_alignment/vtt?tier=badtier", json=request
+        )
+        self.assertEqual(response.status_code, 422)
+
+
+if __name__ == "__main__":
+    main()