docs: run how-to guides in CI (#27615)

Add how-to guides to [Run notebooks
job](https://github.com/langchain-ai/langchain/actions/workflows/run_notebooks.yml)
and fix existing notebooks.

- As with tutorials, cassettes must be updated when HTTP calls in guides
change (by running existing
[script](https://github.com/langchain-ai/langchain/blob/master/docs/scripts/update_cassettes.sh)).
- Cassettes now total ~62mb over 474 files.
- `docs/scripts/prepare_notebooks_for_ci.py` lists a number of notebooks
that do not run (e.g., due to requiring additional infra, slowness,
requiring `input()`, etc.).
This commit is contained in:
ccurme
2024-10-30 12:35:38 -04:00
committed by GitHub
parent 88bfd60b03
commit 595dc592c9
420 changed files with 2333 additions and 321 deletions

View File

@@ -9,9 +9,11 @@ WORKING_DIRECTORY=$1
# Function to execute a single notebook
execute_notebook() {
file="$1"
echo "Starting execution of $file"
index="$2"
total="$3"
echo "Starting execution of $file ($index/$total)"
start_time=$(date +%s)
if ! output=$(time poetry run jupyter nbconvert --to notebook --execute $file 2>&1); then
if ! output=$(time poetry run jupyter nbconvert --to notebook --execute --ExecutePreprocessor.kernel_name=python3 $file 2>&1); then
end_time=$(date +%s)
execution_time=$((end_time - start_time))
echo "Error in $file. Execution time: $execution_time seconds"
@@ -27,12 +29,18 @@ export -f execute_notebook
# Determine the list of notebooks to execute
if [ "$WORKING_DIRECTORY" == "all" ]; then
notebooks=$(find docs/docs/tutorials -name "*.ipynb" | grep -v ".ipynb_checkpoints" | grep -vFf <(echo "$SKIP_NOTEBOOKS"))
notebooks=$(find docs/docs/tutorials docs/docs/how_to -name "*.ipynb" | grep -v ".ipynb_checkpoints" | grep -vFf <(echo "$SKIP_NOTEBOOKS"))
else
notebooks=$(find "$WORKING_DIRECTORY" -name "*.ipynb" | grep -v ".ipynb_checkpoints" | grep -vFf <(echo "$SKIP_NOTEBOOKS"))
fi
# Execute notebooks sequentially
for file in $notebooks; do
execute_notebook "$file"
# Convert the list of notebooks to an array
notebooks_array=($notebooks)
total_notebooks=${#notebooks_array[@]}
# Execute notebooks sequentially with progress indication
for i in "${!notebooks_array[@]}"; do
file="${notebooks_array[$i]}"
index=$((i + 1))
execute_notebook "$file" "$index" "$total_notebooks"
done