Skip to content

Commit

Permalink
ci: add VCR caching to the notebooks (#1704)
Browse files Browse the repository at this point in the history
  • Loading branch information
vbarda authored Sep 12, 2024
1 parent 95b97d6 commit 0b40c83
Show file tree
Hide file tree
Showing 135 changed files with 32,406 additions and 25 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/codespell.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
- name: Codespell
uses: codespell-project/actions-codespell@v2
with:
skip: '*.ambr,*.lock,*.ipynb'
skip: '*.ambr,*.lock,*.ipynb,*.yaml'
ignore_words_list: ${{ steps.extract_ignore_words.outputs.ignore_words_list }}
# We do this to avoid spellchecking cell outputs
- name: Codespell Notebooks
Expand Down
12 changes: 9 additions & 3 deletions .github/workflows/run_notebooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Run notebooks
on:
workflow_dispatch:
schedule:
- cron: '0 13 * * *'
- cron: '0 13 * * *'

jobs:
build:
Expand Down Expand Up @@ -33,11 +33,17 @@ jobs:
run: make start-services

- name: Prepare notebooks
if: ${{ matrix.lib-version == 'development' }}
run: poetry run python docs/_scripts/prepare_notebooks_for_ci.py
run: |
if [ "${{ matrix.lib-version }}" = "development" ]; then
poetry run python docs/_scripts/prepare_notebooks_for_ci.py --comment-install-cells
else
poetry run python docs/_scripts/prepare_notebooks_for_ci.py
fi
- name: Run notebooks
env:
# these won't actually be used because of the VCR cassettes
# but need to set them to avoid triggering getpass()
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }}
Expand Down
128 changes: 108 additions & 20 deletions docs/_scripts/prepare_notebooks_for_ci.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,132 @@
"""Preprocess notebooks for CI. Currently removes pip install cells."""
"""Preprocess notebooks for CI. Currently adds VCR cassettes and optionally removes pip install cells."""

import os
import json
import logging
import os

import click
import nbformat

logger = logging.getLogger(__name__)
NOTEBOOK_DIRS = ("docs/docs/how-tos",)
DOCS_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
CASSETTES_PATH = os.path.join(DOCS_PATH, "cassettes")

NOTEBOOKS_NO_CASSETTES = (
"docs/docs/how-tos/visualization.ipynb",
"docs/docs/how-tos/many-tools.ipynb"
)


def comment_install_cells(notebook: nbformat.NotebookNode) -> nbformat.NotebookNode:
for cell in notebook.cells:
if cell.cell_type != "code":
continue

if "pip install" in cell.source:
# Comment out the lines in cells containing "pip install"
cell.source = "\n".join(
f"# {line}" if line.strip() else line
for line in cell.source.splitlines()
)

return notebook


def is_magic_command(code: str) -> bool:
return code.strip().startswith("%") or code.strip().startswith("!")


def is_comment(code: str) -> bool:
return code.strip().startswith("#")


def remove_install_cells(notebook_path: str) -> None:
with open(notebook_path, "r") as file:
notebook = json.load(file)
def add_vcr_to_notebook(
notebook: nbformat.NotebookNode, cassette_prefix: str
) -> nbformat.NotebookNode:
"""Inject `with vcr.cassette` into each code cell of the notebook."""

indices_to_delete = []
for index, cell in enumerate(notebook["cells"]):
if cell["cell_type"] == "code":
if any("pip install" in line for line in cell["source"]):
indices_to_delete.append(index)
# Inject VCR context manager into each code cell
for idx, cell in enumerate(notebook.cells):
if cell.cell_type != "code":
continue

for index in reversed(indices_to_delete):
notebook["cells"].pop(index)
lines = cell.source.splitlines()
# skip if empty cell
if not lines:
continue

with open(notebook_path, "w") as file:
json.dump(notebook, file, indent=2)
are_magic_lines = [is_magic_command(line) for line in lines]

# skip if all magic
if all(are_magic_lines):
continue

def process_notebooks() -> None:
if any(are_magic_lines):
raise ValueError(
"Cannot process code cells with mixed magic and non-magic code."
)

# skip if just comments
if all(is_comment(line) or not line.strip() for line in lines):
continue

cell_id = cell.get("id", idx)
cassette_name = f"{cassette_prefix}_{cell_id}.yaml"
cell.source = f"with vcr.use_cassette('{cassette_name}', filter_headers=['x-api-key', 'authorization'], record_mode='once'):\n" + "\n".join(
f" {line}" for line in lines
)

# Add import statement
vcr_import_lines = [
"import vcr",
# this is needed for ChatAnthropic
"import nest_asyncio",
"nest_asyncio.apply()",
]
import_cell = nbformat.v4.new_code_cell(source="\n".join(vcr_import_lines))
import_cell.pop("id", None)
notebook.cells.insert(0, import_cell)
return notebook


def process_notebooks(should_comment_install_cells: bool) -> None:
for directory in NOTEBOOK_DIRS:
for root, _, files in os.walk(directory):
for file in files:
if not file.endswith(".ipynb"):
if not file.endswith(".ipynb") or "ipynb_checkpoints" in root:
continue

notebook_path = os.path.join(root, file)
try:
remove_install_cells(notebook_path)
notebook = nbformat.read(notebook_path, as_version=4)

if should_comment_install_cells:
notebook = comment_install_cells(notebook)

base_filename = os.path.splitext(os.path.basename(file))[0]
cassette_prefix = os.path.join(CASSETTES_PATH, base_filename)
if notebook_path not in NOTEBOOKS_NO_CASSETTES:
notebook = add_vcr_to_notebook(
notebook, cassette_prefix=cassette_prefix
)

nbformat.write(notebook, notebook_path)
logger.info(f"Processed: {notebook_path}")
except Exception as e:
logger.error(f"Error processing {notebook_path}: {e}")


if __name__ == "__main__":
process_notebooks()
@click.command()
@click.option(
"--comment-install-cells",
is_flag=True,
default=False,
help="Whether to comment out install cells",
)
def main(comment_install_cells):
process_notebooks(should_comment_install_cells=comment_install_cells)
logger.info("All notebooks processed successfully.")


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
interactions:
- request:
body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "hi! I''m
bob"}, {"role": "assistant", "content": "It''s nice to meet you Bob! I''m an
AI assistant created by Anthropic. I''m here to help with any questions or tasks
you might have. How can I assist you today?"}, {"role": "user", "content": "what''s
my name?"}, {"role": "assistant", "content": "I''m afraid I don''t actually
know your name. As an AI assistant, I don''t have any prior knowledge about
you or your identity. I only know what you''ve directly told me, which is that
your name is Bob. Please let me know if you have any other questions I can help
with!"}, {"role": "user", "content": "i like the celtics!"}, {"role": "assistant",
"content": "Okay, got it! You like the Boston Celtics basketball team. That''s
great, the Celtics have a rich history and are a popular NBA franchise. Do you
follow them closely? What do you like most about the team? I''m always happy
to discuss sports and learn more about the interests of the people I talk to."},
{"role": "user", "content": "i like how much they win"}], "model": "claude-3-haiku-20240307"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '1106'
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- Anthropic/Python 0.34.2
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 0.34.2
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.9
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: !!binary |
H4sIAAAAAAAAA0yST2sbQQzFv4qYSy+bYOcPIXspTQOlFApN20NbilFmZa/iWWkz0tjZhHz3MuuE
5jQgPT3p95inwF1ow2Cb1WJ5dvb47Zzub+7kcXl68/sL/7z+9ek8NMGnkaqKzHBDoQlZUy2gGZuj
eGjCoB2l0IaYsHR0dHrUI2/L0cni5GxxurgITYgqTuKh/fP06uj0UGfnpw2fIaJAkY5yde3AewIc
R8IEuoas6iwbWGsGBCsxktm6JHDCARJvaR64UnMV+EjJOdox/Ohpgh53BLdEAipUzapyUHPodGBB
cVhnlNizkQELfL36AD2ba54a2LP3sLyA2OMwsor1PIKzJ7Jj+E5Ur5q0ZFjjTjM7HU6KKjUfEk8T
7FmgMkUdRnKaKd762Qx/S7CjPIGhs62naowGCGuUmeQV6131k9r2jB0768HdijmyUAf0ECklkkig
O8ozcEcRu8pnEClXYZpgxOw1kX2PDgNuyap2qAH3gAIcVTgeiFhmn0S4KXQM11qxD+Hif/iXG2FM
OFEGzWCEpjKL6zqOJWFOE5Dc6UQd7NFjP9PUzXuW9+H5bxPMdVzleTa0gaRbeckSXhpG96XyhVZK
Sk0o8+dsnwLLWHzluiWx0J4sL5ugxd/WlovL5+d/AAAA//8DACqDJpL8AgAA
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8c225edaaa368c3b-EWR
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Thu, 12 Sep 2024 19:40:40 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
request-id:
- req_01VHhQUqymq7SujsCGRAw7pE
via:
- 1.1 google
x-cloud-trace-context:
- 587824031142d6a2dfae0345c152f033
status:
code: 200
message: OK
- request:
body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "hi! I''m
bob"}, {"role": "assistant", "content": "It''s nice to meet you Bob! I''m an
AI assistant created by Anthropic. I''m here to help with any questions or tasks
you might have. How can I assist you today?"}, {"role": "user", "content": "what''s
my name?"}, {"role": "assistant", "content": "I''m afraid I don''t actually
know your name. As an AI assistant, I don''t have any prior knowledge about
you or your identity. I only know what you''ve directly told me, which is that
your name is Bob. Please let me know if you have any other questions I can help
with!"}, {"role": "user", "content": "i like the celtics!"}, {"role": "assistant",
"content": "Okay, got it! You like the Boston Celtics basketball team. That''s
great, the Celtics have a rich history and are a popular NBA franchise. Do you
follow them closely? What do you like most about the team? I''m always happy
to discuss sports and learn more about the interests of the people I talk to."},
{"role": "user", "content": "i like how much they win"}, {"role": "assistant",
"content": "I can understand the appeal of rooting for a successful team like
the Boston Celtics. They have been one of the most dominant franchises in NBA
history, with 17 championship titles. Seeing your favorite team consistently
win and compete for championships can be very satisfying as a fan. The Celtics''
winning tradition and sustained excellence over the decades is certainly part
of what makes them such an iconic team in the league. Do you have a favorite
Celtics player or season you particularly enjoyed watching them win?"}, {"role":
"user", "content": "Create a summary of the conversation above:"}], "model":
"claude-3-haiku-20240307"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '1744'
content-type:
- application/json
host:
- api.anthropic.com
user-agent:
- Anthropic/Python 0.34.2
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 0.34.2
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.9
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: !!binary |
H4sIAAAAAAAAA2xT22ocMQz9FeGXvEyWTbbQdt6SUmhaKLQUSm8Era1du+uRppaczTTk34tnk+2F
PtlY0tE5R/KdS8H1btDt9fJs9fLNu5+v+PL1h58/zuk9337Wl89XrnM2jdSySBW35DpXJLcHVE1q
yOY6N0ig7HrnM9ZAp6vTiGlXT8+X50+Wq+VT1zkvbMTm+i93j4hGt612Pnr3igpBUkDQOgxYJpAN
WCTwwjdUFC0JgwpssPRf+St/+De2pi0y7JNFGAgSW5FQfeItDJNS3gAqIMPFFRypgy+ERgHWE1yw
xSJj8gv4JLW1ZjDJoYFNUgswDjPDS1kvGoGPLWOSCqg7CrCPaH8ldnAFPmNJm0QBrIWvIKTAJwYR
bwiQJxhLkgI7ln2msCXAtdQZpgPkAMJ5gh3T/ggPEQOEVMhbnh4JdrCPyUfYox4a/ZfwURbdjoVU
H1k11Jx2NNt9KWrC8IKyJa+wbuJsjTmDEQ4HVq1AMYXflVH2MFQfG8QE+8QLuAL0R11hxn4APYHS
yMakJmWaEbWqYWJqN+9JtQMWa7OzSKnA2VN4e3kBPuIwJmGNadQDl4P5aXP0BmGDN1KSHfvBmHGi
AlJACVUOUyMdySfMeQLi7zJRWDyuldxQaYr/Wq+IChvxtdkmDFuyAz+ZpweEPoJYpNIddnA2KWJp
SfM4EhsVUoPEf7pxkDEQFNJROLT89QQhqa+qDxbM5p8ooPcyjDlpHIhN59JMW/TTwt1/65yajNdl
Ful6RxyurRZ2DwGlH5XYk+u55ty5Ov/o/s4lHqtdm+yI1fWr1fPOSbU/386eLe/vfwEAAP//AwCu
w+suMQQAAA==
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8c225ee28b878c3b-EWR
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Thu, 12 Sep 2024 19:40:42 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
request-id:
- req_01Hn7UtUhHZJSyz1gqxAMBfc
via:
- 1.1 google
x-cloud-trace-context:
- 3dc4bb1338ba2e9b0e95217e8f3a6c9d
status:
code: 200
message: OK
version: 1
Loading

0 comments on commit 0b40c83

Please sign in to comment.