mirror of
https://github.com/hwchase17/langchain.git
synced 2025-12-05 12:16:01 +00:00
docs: run how-to guides in CI (#27615)
Add how-to guides to [Run notebooks job](https://github.com/langchain-ai/langchain/actions/workflows/run_notebooks.yml) and fix existing notebooks. - As with tutorials, cassettes must be updated when HTTP calls in guides change (by running existing [script](https://github.com/langchain-ai/langchain/blob/master/docs/scripts/update_cassettes.sh)). - Cassettes now total ~62mb over 474 files. - `docs/scripts/prepare_notebooks_for_ci.py` lists a number of notebooks that do not run (e.g., due to requiring additional infra, slowness, requiring `input()`, etc.).
This commit is contained in:
14
docs/scripts/cache_data.py
Normal file
14
docs/scripts/cache_data.py
Normal file
@@ -0,0 +1,14 @@
|
||||
import tiktoken
|
||||
from unstructured.nlp.tokenize import download_nltk_packages
|
||||
|
||||
|
||||
def download_tiktoken_data():
|
||||
# This will trigger the download and caching of the necessary files
|
||||
_ = tiktoken.encoding_for_model("gpt2")
|
||||
_ = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
||||
_ = tiktoken.encoding_for_model("gpt-4o-mini")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
download_tiktoken_data()
|
||||
download_nltk_packages()
|
||||
Reference in New Issue
Block a user