monorepo: add script for updating notebook cassettes (#27399)

1. Move dependencies for running notebooks into monorepo poetry test
deps;
2. Add script to update cassettes for a single notebook;
3. Add cassettes for some how-to guides.

---

To update cassettes for a single notebook, run
`docs/scripts/update_cassettes.sh`. For example:
```
./docs/scripts/update_cassettes.sh docs/docs/how_to/binding.ipynb
```
Requires:
1. monorepo dev and test dependencies installed;
2. env vars required by notebook are set.

Note: How-to guides are not currently run in [scheduled
job](https://github.com/langchain-ai/langchain/actions/workflows/run_notebooks.yml).
Will add cassettes for more how-to guides in subsequent PRs before
adding them to scheduled job.
This commit is contained in:
ccurme
2024-10-16 13:46:49 -04:00
committed by GitHub
parent 88d71f6986
commit fdb7f951c8
23 changed files with 2805 additions and 42 deletions

View File

@@ -122,7 +122,10 @@ def add_vcr_to_notebook(
return notebook
def process_notebooks(should_comment_install_cells: bool) -> None:
def process_notebooks(
should_comment_install_cells: bool,
working_directory: str,
) -> None:
for directory in NOTEBOOK_DIRS:
for root, _, files in os.walk(directory):
for file in files:
@@ -130,6 +133,12 @@ def process_notebooks(should_comment_install_cells: bool) -> None:
continue
notebook_path = os.path.join(root, file)
# Filter notebooks based on the working_directory input
if working_directory != "all" and not notebook_path.startswith(
working_directory
):
continue
try:
notebook = nbformat.read(notebook_path, as_version=4)
@@ -172,8 +181,16 @@ def process_notebooks(should_comment_install_cells: bool) -> None:
default=False,
help="Whether to comment out install cells",
)
def main(comment_install_cells):
process_notebooks(should_comment_install_cells=comment_install_cells)
@click.option(
"--working-directory",
default="all",
help="Working directory or specific notebook to process",
)
def main(comment_install_cells, working_directory):
process_notebooks(
should_comment_install_cells=comment_install_cells,
working_directory=working_directory,
)
logger.info("All notebooks processed successfully.")

View File

@@ -0,0 +1,33 @@
#!/bin/bash
# Get the working directory from the input argument, default to 'all' if not provided
WORKING_DIRECTORY=${1:-all}
# Function to delete cassettes
delete_cassettes() {
local dir=$1
if [ "$dir" == "all" ]; then
echo "Deleting all cassettes..."
rm -f docs/cassettes/*.msgpack.zlib
else
# Extract the filename from the directory path
local filename=$(basename "$dir" .ipynb)
echo "Deleting cassettes for $filename..."
rm -f docs/cassettes/${filename}_*.msgpack.zlib
fi
}
# Delete existing cassettes
delete_cassettes "$WORKING_DIRECTORY"
# Pre-download tiktoken files
echo "Pre-downloading tiktoken files..."
poetry run python docs/scripts/download_tiktoken.py
# Prepare notebooks
echo "Preparing notebooks for CI..."
poetry run python docs/scripts/prepare_notebooks_for_ci.py --comment-install-cells --working-directory "$WORKING_DIRECTORY"
# Run notebooks
echo "Running notebooks..."
./docs/scripts/execute_notebooks.sh "$WORKING_DIRECTORY"