-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ci: add VCR caching to the notebooks (#1704)
- Loading branch information
Showing
135 changed files
with
32,406 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,132 @@ | ||
"""Preprocess notebooks for CI. Currently removes pip install cells.""" | ||
"""Preprocess notebooks for CI. Currently adds VCR cassettes and optionally removes pip install cells.""" | ||
|
||
import os | ||
import json | ||
import logging | ||
import os | ||
|
||
import click | ||
import nbformat | ||
|
||
logger = logging.getLogger(__name__) | ||
NOTEBOOK_DIRS = ("docs/docs/how-tos",) | ||
DOCS_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
CASSETTES_PATH = os.path.join(DOCS_PATH, "cassettes") | ||
|
||
NOTEBOOKS_NO_CASSETTES = ( | ||
"docs/docs/how-tos/visualization.ipynb", | ||
"docs/docs/how-tos/many-tools.ipynb" | ||
) | ||
|
||
|
||
def comment_install_cells(notebook: nbformat.NotebookNode) -> nbformat.NotebookNode: | ||
for cell in notebook.cells: | ||
if cell.cell_type != "code": | ||
continue | ||
|
||
if "pip install" in cell.source: | ||
# Comment out the lines in cells containing "pip install" | ||
cell.source = "\n".join( | ||
f"# {line}" if line.strip() else line | ||
for line in cell.source.splitlines() | ||
) | ||
|
||
return notebook | ||
|
||
|
||
def is_magic_command(code: str) -> bool: | ||
return code.strip().startswith("%") or code.strip().startswith("!") | ||
|
||
|
||
def is_comment(code: str) -> bool: | ||
return code.strip().startswith("#") | ||
|
||
|
||
def remove_install_cells(notebook_path: str) -> None: | ||
with open(notebook_path, "r") as file: | ||
notebook = json.load(file) | ||
def add_vcr_to_notebook( | ||
notebook: nbformat.NotebookNode, cassette_prefix: str | ||
) -> nbformat.NotebookNode: | ||
"""Inject `with vcr.cassette` into each code cell of the notebook.""" | ||
|
||
indices_to_delete = [] | ||
for index, cell in enumerate(notebook["cells"]): | ||
if cell["cell_type"] == "code": | ||
if any("pip install" in line for line in cell["source"]): | ||
indices_to_delete.append(index) | ||
# Inject VCR context manager into each code cell | ||
for idx, cell in enumerate(notebook.cells): | ||
if cell.cell_type != "code": | ||
continue | ||
|
||
for index in reversed(indices_to_delete): | ||
notebook["cells"].pop(index) | ||
lines = cell.source.splitlines() | ||
# skip if empty cell | ||
if not lines: | ||
continue | ||
|
||
with open(notebook_path, "w") as file: | ||
json.dump(notebook, file, indent=2) | ||
are_magic_lines = [is_magic_command(line) for line in lines] | ||
|
||
# skip if all magic | ||
if all(are_magic_lines): | ||
continue | ||
|
||
def process_notebooks() -> None: | ||
if any(are_magic_lines): | ||
raise ValueError( | ||
"Cannot process code cells with mixed magic and non-magic code." | ||
) | ||
|
||
# skip if just comments | ||
if all(is_comment(line) or not line.strip() for line in lines): | ||
continue | ||
|
||
cell_id = cell.get("id", idx) | ||
cassette_name = f"{cassette_prefix}_{cell_id}.yaml" | ||
cell.source = f"with vcr.use_cassette('{cassette_name}', filter_headers=['x-api-key', 'authorization'], record_mode='once'):\n" + "\n".join( | ||
f" {line}" for line in lines | ||
) | ||
|
||
# Add import statement | ||
vcr_import_lines = [ | ||
"import vcr", | ||
# this is needed for ChatAnthropic | ||
"import nest_asyncio", | ||
"nest_asyncio.apply()", | ||
] | ||
import_cell = nbformat.v4.new_code_cell(source="\n".join(vcr_import_lines)) | ||
import_cell.pop("id", None) | ||
notebook.cells.insert(0, import_cell) | ||
return notebook | ||
|
||
|
||
def process_notebooks(should_comment_install_cells: bool) -> None: | ||
for directory in NOTEBOOK_DIRS: | ||
for root, _, files in os.walk(directory): | ||
for file in files: | ||
if not file.endswith(".ipynb"): | ||
if not file.endswith(".ipynb") or "ipynb_checkpoints" in root: | ||
continue | ||
|
||
notebook_path = os.path.join(root, file) | ||
try: | ||
remove_install_cells(notebook_path) | ||
notebook = nbformat.read(notebook_path, as_version=4) | ||
|
||
if should_comment_install_cells: | ||
notebook = comment_install_cells(notebook) | ||
|
||
base_filename = os.path.splitext(os.path.basename(file))[0] | ||
cassette_prefix = os.path.join(CASSETTES_PATH, base_filename) | ||
if notebook_path not in NOTEBOOKS_NO_CASSETTES: | ||
notebook = add_vcr_to_notebook( | ||
notebook, cassette_prefix=cassette_prefix | ||
) | ||
|
||
nbformat.write(notebook, notebook_path) | ||
logger.info(f"Processed: {notebook_path}") | ||
except Exception as e: | ||
logger.error(f"Error processing {notebook_path}: {e}") | ||
|
||
|
||
if __name__ == "__main__": | ||
process_notebooks() | ||
@click.command() | ||
@click.option( | ||
"--comment-install-cells", | ||
is_flag=True, | ||
default=False, | ||
help="Whether to comment out install cells", | ||
) | ||
def main(comment_install_cells): | ||
process_notebooks(should_comment_install_cells=comment_install_cells) | ||
logger.info("All notebooks processed successfully.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
190 changes: 190 additions & 0 deletions
190
docs/cassettes/add-summary-conversation-history_048805a4-3d97-4e76-ac45-8d80d4364c46.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
interactions: | ||
- request: | ||
body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "hi! I''m | ||
bob"}, {"role": "assistant", "content": "It''s nice to meet you Bob! I''m an | ||
AI assistant created by Anthropic. I''m here to help with any questions or tasks | ||
you might have. How can I assist you today?"}, {"role": "user", "content": "what''s | ||
my name?"}, {"role": "assistant", "content": "I''m afraid I don''t actually | ||
know your name. As an AI assistant, I don''t have any prior knowledge about | ||
you or your identity. I only know what you''ve directly told me, which is that | ||
your name is Bob. Please let me know if you have any other questions I can help | ||
with!"}, {"role": "user", "content": "i like the celtics!"}, {"role": "assistant", | ||
"content": "Okay, got it! You like the Boston Celtics basketball team. That''s | ||
great, the Celtics have a rich history and are a popular NBA franchise. Do you | ||
follow them closely? What do you like most about the team? I''m always happy | ||
to discuss sports and learn more about the interests of the people I talk to."}, | ||
{"role": "user", "content": "i like how much they win"}], "model": "claude-3-haiku-20240307"}' | ||
headers: | ||
accept: | ||
- application/json | ||
accept-encoding: | ||
- gzip, deflate | ||
anthropic-version: | ||
- '2023-06-01' | ||
connection: | ||
- keep-alive | ||
content-length: | ||
- '1106' | ||
content-type: | ||
- application/json | ||
host: | ||
- api.anthropic.com | ||
user-agent: | ||
- Anthropic/Python 0.34.2 | ||
x-stainless-arch: | ||
- arm64 | ||
x-stainless-async: | ||
- 'false' | ||
x-stainless-lang: | ||
- python | ||
x-stainless-os: | ||
- MacOS | ||
x-stainless-package-version: | ||
- 0.34.2 | ||
x-stainless-runtime: | ||
- CPython | ||
x-stainless-runtime-version: | ||
- 3.11.9 | ||
method: POST | ||
uri: https://api.anthropic.com/v1/messages | ||
response: | ||
body: | ||
string: !!binary | | ||
H4sIAAAAAAAAA0yST2sbQQzFv4qYSy+bYOcPIXspTQOlFApN20NbilFmZa/iWWkz0tjZhHz3MuuE | ||
5jQgPT3p95inwF1ow2Cb1WJ5dvb47Zzub+7kcXl68/sL/7z+9ek8NMGnkaqKzHBDoQlZUy2gGZuj | ||
eGjCoB2l0IaYsHR0dHrUI2/L0cni5GxxurgITYgqTuKh/fP06uj0UGfnpw2fIaJAkY5yde3AewIc | ||
R8IEuoas6iwbWGsGBCsxktm6JHDCARJvaR64UnMV+EjJOdox/Ohpgh53BLdEAipUzapyUHPodGBB | ||
cVhnlNizkQELfL36AD2ba54a2LP3sLyA2OMwsor1PIKzJ7Jj+E5Ur5q0ZFjjTjM7HU6KKjUfEk8T | ||
7FmgMkUdRnKaKd762Qx/S7CjPIGhs62naowGCGuUmeQV6131k9r2jB0768HdijmyUAf0ECklkkig | ||
O8ozcEcRu8pnEClXYZpgxOw1kX2PDgNuyap2qAH3gAIcVTgeiFhmn0S4KXQM11qxD+Hif/iXG2FM | ||
OFEGzWCEpjKL6zqOJWFOE5Dc6UQd7NFjP9PUzXuW9+H5bxPMdVzleTa0gaRbeckSXhpG96XyhVZK | ||
Sk0o8+dsnwLLWHzluiWx0J4sL5ugxd/WlovL5+d/AAAA//8DACqDJpL8AgAA | ||
headers: | ||
CF-Cache-Status: | ||
- DYNAMIC | ||
CF-RAY: | ||
- 8c225edaaa368c3b-EWR | ||
Connection: | ||
- keep-alive | ||
Content-Encoding: | ||
- gzip | ||
Content-Type: | ||
- application/json | ||
Date: | ||
- Thu, 12 Sep 2024 19:40:40 GMT | ||
Server: | ||
- cloudflare | ||
Transfer-Encoding: | ||
- chunked | ||
X-Robots-Tag: | ||
- none | ||
request-id: | ||
- req_01VHhQUqymq7SujsCGRAw7pE | ||
via: | ||
- 1.1 google | ||
x-cloud-trace-context: | ||
- 587824031142d6a2dfae0345c152f033 | ||
status: | ||
code: 200 | ||
message: OK | ||
- request: | ||
body: '{"max_tokens": 1024, "messages": [{"role": "user", "content": "hi! I''m | ||
bob"}, {"role": "assistant", "content": "It''s nice to meet you Bob! I''m an | ||
AI assistant created by Anthropic. I''m here to help with any questions or tasks | ||
you might have. How can I assist you today?"}, {"role": "user", "content": "what''s | ||
my name?"}, {"role": "assistant", "content": "I''m afraid I don''t actually | ||
know your name. As an AI assistant, I don''t have any prior knowledge about | ||
you or your identity. I only know what you''ve directly told me, which is that | ||
your name is Bob. Please let me know if you have any other questions I can help | ||
with!"}, {"role": "user", "content": "i like the celtics!"}, {"role": "assistant", | ||
"content": "Okay, got it! You like the Boston Celtics basketball team. That''s | ||
great, the Celtics have a rich history and are a popular NBA franchise. Do you | ||
follow them closely? What do you like most about the team? I''m always happy | ||
to discuss sports and learn more about the interests of the people I talk to."}, | ||
{"role": "user", "content": "i like how much they win"}, {"role": "assistant", | ||
"content": "I can understand the appeal of rooting for a successful team like | ||
the Boston Celtics. They have been one of the most dominant franchises in NBA | ||
history, with 17 championship titles. Seeing your favorite team consistently | ||
win and compete for championships can be very satisfying as a fan. The Celtics'' | ||
winning tradition and sustained excellence over the decades is certainly part | ||
of what makes them such an iconic team in the league. Do you have a favorite | ||
Celtics player or season you particularly enjoyed watching them win?"}, {"role": | ||
"user", "content": "Create a summary of the conversation above:"}], "model": | ||
"claude-3-haiku-20240307"}' | ||
headers: | ||
accept: | ||
- application/json | ||
accept-encoding: | ||
- gzip, deflate | ||
anthropic-version: | ||
- '2023-06-01' | ||
connection: | ||
- keep-alive | ||
content-length: | ||
- '1744' | ||
content-type: | ||
- application/json | ||
host: | ||
- api.anthropic.com | ||
user-agent: | ||
- Anthropic/Python 0.34.2 | ||
x-stainless-arch: | ||
- arm64 | ||
x-stainless-async: | ||
- 'false' | ||
x-stainless-lang: | ||
- python | ||
x-stainless-os: | ||
- MacOS | ||
x-stainless-package-version: | ||
- 0.34.2 | ||
x-stainless-runtime: | ||
- CPython | ||
x-stainless-runtime-version: | ||
- 3.11.9 | ||
method: POST | ||
uri: https://api.anthropic.com/v1/messages | ||
response: | ||
body: | ||
string: !!binary | | ||
H4sIAAAAAAAAA2xT22ocMQz9FeGXvEyWTbbQdt6SUmhaKLQUSm8Era1du+uRppaczTTk34tnk+2F | ||
PtlY0tE5R/KdS8H1btDt9fJs9fLNu5+v+PL1h58/zuk9337Wl89XrnM2jdSySBW35DpXJLcHVE1q | ||
yOY6N0ig7HrnM9ZAp6vTiGlXT8+X50+Wq+VT1zkvbMTm+i93j4hGt612Pnr3igpBUkDQOgxYJpAN | ||
WCTwwjdUFC0JgwpssPRf+St/+De2pi0y7JNFGAgSW5FQfeItDJNS3gAqIMPFFRypgy+ERgHWE1yw | ||
xSJj8gv4JLW1ZjDJoYFNUgswDjPDS1kvGoGPLWOSCqg7CrCPaH8ldnAFPmNJm0QBrIWvIKTAJwYR | ||
bwiQJxhLkgI7ln2msCXAtdQZpgPkAMJ5gh3T/ggPEQOEVMhbnh4JdrCPyUfYox4a/ZfwURbdjoVU | ||
H1k11Jx2NNt9KWrC8IKyJa+wbuJsjTmDEQ4HVq1AMYXflVH2MFQfG8QE+8QLuAL0R11hxn4APYHS | ||
yMakJmWaEbWqYWJqN+9JtQMWa7OzSKnA2VN4e3kBPuIwJmGNadQDl4P5aXP0BmGDN1KSHfvBmHGi | ||
AlJACVUOUyMdySfMeQLi7zJRWDyuldxQaYr/Wq+IChvxtdkmDFuyAz+ZpweEPoJYpNIddnA2KWJp | ||
SfM4EhsVUoPEf7pxkDEQFNJROLT89QQhqa+qDxbM5p8ooPcyjDlpHIhN59JMW/TTwt1/65yajNdl | ||
Ful6RxyurRZ2DwGlH5XYk+u55ty5Ov/o/s4lHqtdm+yI1fWr1fPOSbU/386eLe/vfwEAAP//AwCu | ||
w+suMQQAAA== | ||
headers: | ||
CF-Cache-Status: | ||
- DYNAMIC | ||
CF-RAY: | ||
- 8c225ee28b878c3b-EWR | ||
Connection: | ||
- keep-alive | ||
Content-Encoding: | ||
- gzip | ||
Content-Type: | ||
- application/json | ||
Date: | ||
- Thu, 12 Sep 2024 19:40:42 GMT | ||
Server: | ||
- cloudflare | ||
Transfer-Encoding: | ||
- chunked | ||
X-Robots-Tag: | ||
- none | ||
request-id: | ||
- req_01Hn7UtUhHZJSyz1gqxAMBfc | ||
via: | ||
- 1.1 google | ||
x-cloud-trace-context: | ||
- 3dc4bb1338ba2e9b0e95217e8f3a6c9d | ||
status: | ||
code: 200 | ||
message: OK | ||
version: 1 |
Oops, something went wrong.