From e87d3adb4c4b2b1c3f9c317351a6cd6ae050d79c Mon Sep 17 00:00:00 2001 From: Vadym Barda Date: Fri, 13 Sep 2024 12:56:23 -0400 Subject: [PATCH] docs: add a README for updating notebooks/cassettes (#1705) --- .github/workflows/run_notebooks.yml | 6 +-- docs/README.md | 61 +++++++++++++++++++++++++++++ docs/_scripts/execute_notebooks.sh | 5 +++ 3 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 docs/README.md create mode 100755 docs/_scripts/execute_notebooks.sh diff --git a/.github/workflows/run_notebooks.yml b/.github/workflows/run_notebooks.yml index 32dafdbdd..de2d15131 100644 --- a/.github/workflows/run_notebooks.yml +++ b/.github/workflows/run_notebooks.yml @@ -49,11 +49,7 @@ jobs: TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }} LANGSMITH_API_KEY: ${{ secrets.LANGSMITH_API_KEY }} run: | - for file in $(find docs/docs/how-tos -name "*.ipynb") - do - echo "Executing $file" - PIP_PRE=1 poetry run jupyter execute "$file" - done + ./docs/_scripts/execute_notebooks.sh - name: Stop services run: make stop-services diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..df9d99882 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,61 @@ +# Setup + +To setup requirements for building docs you can run: + +```bash +poetry install --with test +``` + +## Serving documentation locally + +To run the documentation server locally you can run: + +```bash +make serve-docs +``` + +## Execute notebooks + +If you would like to automatically execute all of the notebooks, to mimic the "Run notebooks" GHA, you can run: + +```bash +python docs/_scripts/prepare_notebooks_for_ci.py +./docs/_scripts/execute_notebooks.sh +``` + +**Note**: if you want to run the notebooks without `%pip install` cells, you can run: + +```bash +python docs/_scripts/prepare_notebooks_for_ci.py --comment-install-cells +./docs/_scripts/execute_notebooks.sh +``` + +`prepare_notebooks_for_ci.py` script will add VCR cassette context manager for each cell in the notebook, so that: +* when the notebook is run for the first time, cells with network requests will be recorded to a VCR cassette file +* when the notebook is run subsequently, the cells with network requests will be replayed from the cassettes + +**Note**: this is currently limited only to the notebooks in `docs/docs/how-tos` + +## Adding new notebooks + +If you are adding a notebook with API requests, it's **recommended** to record network requests so that they can be subsequently replayed. If this is not done, the notebook runner will make API requests every time the notebook is run, which can be costly and slow. + +To record network requests, please make sure to first run `prepare_notebooks_for_ci.py` script. + +Then, run + +```bash +jupyter execute +``` + +Once the notebook is executed, you should see the new VCR cassettes recorded in `docs/cassettes` directory and discard the updated notebook. + +## Updating existing notebooks + +If you are updating an existing notebook, please make sure to remove any existing cassettes for the notebook in `docs/cassettes` directory (each cassette is prefixed with the notebook name), and then run the steps from the "Adding new notebooks" section above. + +To delete cassettes for a notebook, you can run: + +```bash +rm docs/cassettes/* +``` \ No newline at end of file diff --git a/docs/_scripts/execute_notebooks.sh b/docs/_scripts/execute_notebooks.sh new file mode 100755 index 000000000..0d1bb576f --- /dev/null +++ b/docs/_scripts/execute_notebooks.sh @@ -0,0 +1,5 @@ +for file in $(find docs/docs/how-tos -name "*.ipynb" | grep -v ".ipynb_checkpoints") +do + echo "Executing $file" + poetry run jupyter execute "$file" +done \ No newline at end of file