diff --git a/Makefile b/Makefile index 09447ae7668..9e411a56bde 100644 --- a/Makefile +++ b/Makefile @@ -17,16 +17,11 @@ clean: docs_clean api_docs_clean ## docs_build: Build the documentation. docs_build: - docs/.local_build.sh + cd docs && make build-local ## docs_clean: Clean the documentation build artifacts. docs_clean: - @if [ -d _dist ]; then \ - rm -r _dist; \ - echo "Directory _dist has been cleaned."; \ - else \ - echo "Nothing to clean."; \ - fi + cd docs && make clean ## docs_linkcheck: Run linkchecker on the documentation. docs_linkcheck: diff --git a/docs/.gitignore b/docs/.gitignore index 505adfc3ded..c5870c13e3a 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,2 +1,3 @@ /.quarto/ src/supabase.d.ts +build \ No newline at end of file diff --git a/docs/.local_build.sh b/docs/.local_build.sh deleted file mode 100755 index cde92e265f6..00000000000 --- a/docs/.local_build.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -set -o errexit -set -o nounset -set -o pipefail -set -o xtrace - -SCRIPT_DIR="$(cd "$(dirname "$0")"; pwd)" -cd "${SCRIPT_DIR}" - -mkdir -p ../_dist -rsync -ruv --exclude node_modules --exclude api_reference --exclude .venv --exclude .docusaurus . ../_dist -cd ../_dist -poetry run python scripts/model_feat_table.py -cp ../cookbook/README.md src/pages/cookbook.mdx -mkdir -p docs/templates -cp ../templates/docs/INDEX.md docs/templates/index.md -poetry run python scripts/copy_templates.py -wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md -wget -q https://raw.githubusercontent.com/langchain-ai/langgraph/main/README.md -O docs/langgraph.md - - -poetry run quarto render docs -poetry run python scripts/generate_api_reference_links.py --docs_dir docs - -yarn -yarn start diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000000..72b02adab4d --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,80 @@ +# we build the docs in these stages: +# 1. install quarto and python dependencies +# 2. copy files from "source dir" to "intermediate dir" +# 2. generate files like model feat table, etc in "intermediate dir" +# 3. copy files to their right spots (e.g. langserve readme) in "intermediate dir" +# 4. build the docs from "intermediate dir" to "output dir" + +SOURCE_DIR = docs/ +INTERMEDIATE_DIR = build/intermediate/docs +OUTPUT_DIR = build/output +OUTPUT_DOCS_DIR = $(OUTPUT_DIR)/docs + +PYTHON = .venv/bin/python + +QUARTO_CMD ?= quarto + +PARTNER_DEPS_LIST := $(shell ls -1 ../libs/partners | grep -vE "airbyte|ibm" | xargs -I {} echo "../libs/partners/{}" | tr '\n' ' ') + +PORT ?= 3001 + +clean: + rm -rf build + +install-vercel-deps: + yum -y update + yum install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip rsync -y + + wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.3.450/quarto-1.3.450-linux-amd64.tar.gz + tar -xzf quarto-1.3.450-linux-amd64.tar.gz + +install-py-deps: + python3 -m venv .venv + $(PYTHON) -m pip install --upgrade pip + $(PYTHON) -m pip install --upgrade uv + $(PYTHON) -m uv pip install -r vercel_requirements.txt + $(PYTHON) -m uv pip install --editable $(PARTNER_DEPS_LIST) + +generate-files: + mkdir -p $(INTERMEDIATE_DIR) + cp -r $(SOURCE_DIR)/* $(INTERMEDIATE_DIR) + mkdir -p $(INTERMEDIATE_DIR)/templates + cp ../templates/docs/INDEX.md $(INTERMEDIATE_DIR)/templates/index.md + cp ../cookbook/README.md $(INTERMEDIATE_DIR)/cookbook.mdx + + $(PYTHON) scripts/model_feat_table.py $(INTERMEDIATE_DIR) + + $(PYTHON) scripts/copy_templates.py $(INTERMEDIATE_DIR) + + wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O $(INTERMEDIATE_DIR)/langserve.md + $(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langserve.md https://github.com/langchain-ai/langserve/tree/main/ + + wget -q https://raw.githubusercontent.com/langchain-ai/langgraph/main/README.md -O $(INTERMEDIATE_DIR)/langgraph.md + $(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langgraph.md https://github.com/langchain-ai/langgraph/tree/main/ + + $(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(INTERMEDIATE_DIR) + +copy-infra: + mkdir -p $(OUTPUT_DIR) + cp -r src $(OUTPUT_DIR) + cp vercel.json $(OUTPUT_DIR) + cp babel.config.js $(OUTPUT_DIR) + cp -r data $(OUTPUT_DIR) + cp docusaurus.config.js $(OUTPUT_DIR) + cp package.json $(OUTPUT_DIR) + cp sidebars.js $(OUTPUT_DIR) + cp -r static $(OUTPUT_DIR) + cp yarn.lock $(OUTPUT_DIR) + +quarto-render: + $(QUARTO_CMD) render $(INTERMEDIATE_DIR) --output-dir $(OUTPUT_DOCS_DIR) --no-execute + mv $(OUTPUT_DOCS_DIR)/$(INTERMEDIATE_DIR)/* $(OUTPUT_DOCS_DIR) + rm -rf $(OUTPUT_DOCS_DIR)/build + +md-sync: + rsync -avm --include="*/" --include="*.mdx" --include="*.md" --exclude="*" $(INTERMEDIATE_DIR)/ $(OUTPUT_DOCS_DIR) + +build: install-py-deps generate-files copy-infra quarto-render md-sync + +start: + cd $(OUTPUT_DIR) && yarn && yarn start --port=$(PORT) diff --git a/docs/code-block-loader.js b/docs/code-block-loader.js deleted file mode 100644 index 044e4552dc8..00000000000 --- a/docs/code-block-loader.js +++ /dev/null @@ -1,76 +0,0 @@ -/* eslint-disable prefer-template */ -/* eslint-disable no-param-reassign */ -// eslint-disable-next-line import/no-extraneous-dependencies -const babel = require("@babel/core"); -const path = require("path"); -const fs = require("fs"); - -/** - * - * @param {string|Buffer} content Content of the resource file - * @param {object} [map] SourceMap data consumable by https://github.com/mozilla/source-map - * @param {any} [meta] Meta data, could be anything - */ -async function webpackLoader(content, map, meta) { - const cb = this.async(); - - if (!this.resourcePath.endsWith(".ts")) { - cb(null, JSON.stringify({ content, imports: [] }), map, meta); - return; - } - - try { - const result = await babel.parseAsync(content, { - sourceType: "module", - filename: this.resourcePath, - }); - - const imports = []; - - result.program.body.forEach((node) => { - if (node.type === "ImportDeclaration") { - const source = node.source.value; - - if (!source.startsWith("langchain")) { - return; - } - - node.specifiers.forEach((specifier) => { - if (specifier.type === "ImportSpecifier") { - const local = specifier.local.name; - const imported = specifier.imported.name; - imports.push({ local, imported, source }); - } else { - throw new Error("Unsupported import type"); - } - }); - } - }); - - imports.forEach((imp) => { - const { imported, source } = imp; - const moduleName = source.split("/").slice(1).join("_"); - const docsPath = path.resolve(__dirname, "docs", "api", moduleName); - const available = fs.readdirSync(docsPath, { withFileTypes: true }); - const found = available.find( - (dirent) => - dirent.isDirectory() && - fs.existsSync(path.resolve(docsPath, dirent.name, imported + ".md")) - ); - if (found) { - imp.docs = - "/" + path.join("docs", "api", moduleName, found.name, imported); - } else { - throw new Error( - `Could not find docs for ${source}.${imported} in docs/api/` - ); - } - }); - - cb(null, JSON.stringify({ content, imports }), map, meta); - } catch (err) { - cb(err); - } -} - -module.exports = webpackLoader; diff --git a/docs/docs/integrations/llms/huggingface_pipelines.ipynb b/docs/docs/integrations/llms/huggingface_pipelines.ipynb index c47beae6642..28f7caa104f 100644 --- a/docs/docs/integrations/llms/huggingface_pipelines.ipynb +++ b/docs/docs/integrations/llms/huggingface_pipelines.ipynb @@ -330,7 +330,7 @@ "id": "da9a9239", "metadata": {}, "source": [ - "For more information refer to [OpenVINO LLM guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html) and [OpenVINO Local Pipelines notebook](./openvino.ipynb)." + "For more information refer to [OpenVINO LLM guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html) and [OpenVINO Local Pipelines notebook](/docs/integrations/llms/openvino/)." ] } ], diff --git a/docs/docs/integrations/providers/intel.mdx b/docs/docs/integrations/providers/intel.mdx index 888d9ba3cb7..9429d986c07 100644 --- a/docs/docs/integrations/providers/intel.mdx +++ b/docs/docs/integrations/providers/intel.mdx @@ -67,7 +67,7 @@ from langchain_community.embeddings import QuantizedBgeEmbeddings ### Weight-Only Quantization with ITREX -See a [usage example](../docs/integrations/llms/weight_only_quantization.ipynb). +See a [usage example](/docs/integrations/llms/weight_only_quantization). ## Detail of Configuration Parameters diff --git a/docs/scripts/copy_templates.py b/docs/scripts/copy_templates.py index b397c6d1d74..403a54693d6 100644 --- a/docs/scripts/copy_templates.py +++ b/docs/scripts/copy_templates.py @@ -2,35 +2,44 @@ import glob import os import re import shutil +import sys from pathlib import Path -TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[2] / "templates" -DOCS_TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "templates" +if __name__ == "__main__": + intermediate_dir = Path(sys.argv[1]) + templates_source_dir = Path(os.path.abspath(__file__)).parents[2] / "templates" + templates_intermediate_dir = intermediate_dir / "templates" -readmes = list(glob.glob(str(TEMPLATES_DIR) + "/*/README.md")) -destinations = [readme[len(str(TEMPLATES_DIR)) + 1 : -10] + ".md" for readme in readmes] -for source, destination in zip(readmes, destinations): - full_destination = DOCS_TEMPLATES_DIR / destination - shutil.copyfile(source, full_destination) - with open(full_destination, "r") as f: - content = f.read() - # remove images - content = re.sub("\!\[.*?\]\((.*?)\)", "", content) - with open(full_destination, "w") as f: - f.write(content) + readmes = list(glob.glob(str(templates_source_dir) + "/*/README.md")) + destinations = [ + readme[len(str(templates_source_dir)) + 1 : -10] + ".md" for readme in readmes + ] + for source, destination in zip(readmes, destinations): + full_destination = templates_intermediate_dir / destination + shutil.copyfile(source, full_destination) + with open(full_destination, "r") as f: + content = f.read() + # remove images + content = re.sub("\!\[.*?\]\((.*?)\)", "", content) + with open(full_destination, "w") as f: + f.write(content) -sidebar_hidden = """--- + sidebar_hidden = """--- sidebar_class_name: hidden --- """ -TEMPLATES_INDEX_DESTINATION = DOCS_TEMPLATES_DIR / "index.md" -with open(TEMPLATES_INDEX_DESTINATION, "r") as f: - content = f.read() -# replace relative links -content = re.sub("\]\(\.\.\/", "](/docs/templates/", content) + # handle index file + templates_index_source = templates_source_dir / "docs" / "INDEX.md" + templates_index_intermediate = templates_intermediate_dir / "index.md" -with open(TEMPLATES_INDEX_DESTINATION, "w") as f: - f.write(sidebar_hidden + content) + with open(templates_index_source, "r") as f: + content = f.read() + + # replace relative links + content = re.sub("\]\(\.\.\/", "](/docs/templates/", content) + + with open(templates_index_intermediate, "w") as f: + f.write(sidebar_hidden + content) diff --git a/docs/scripts/generate_api_reference_links.py b/docs/scripts/generate_api_reference_links.py index 77c32a47327..9838303faaa 100644 --- a/docs/scripts/generate_api_reference_links.py +++ b/docs/scripts/generate_api_reference_links.py @@ -25,7 +25,6 @@ _IMPORT_RE = re.compile( _CURRENT_PATH = Path(__file__).parent.absolute() # Directory where generated markdown files are stored _DOCS_DIR = _CURRENT_PATH / "docs" -_JSON_PATH = _CURRENT_PATH / "api_reference" / "guide_imports.json" def find_files(path): @@ -55,6 +54,12 @@ def get_args(): default=_DOCS_DIR, help="Directory where generated markdown files are stored", ) + parser.add_argument( + "--json_path", + type=str, + default=None, + help="Path to store the generated JSON file", + ) return parser.parse_args() @@ -83,9 +88,11 @@ def main(): global_imports[class_name][doc_title] = doc_url # Write the global imports information to a JSON file - _JSON_PATH.parent.mkdir(parents=True, exist_ok=True) - with _JSON_PATH.open("w") as f: - json.dump(global_imports, f) + if args.json_path: + json_path = Path(args.json_path) + json_path.parent.mkdir(parents=True, exist_ok=True) + with json_path.open("w") as f: + json.dump(global_imports, f) def _get_doc_title(data: str, file_name: str) -> str: diff --git a/docs/scripts/model_feat_table.py b/docs/scripts/model_feat_table.py index e56fee80d0c..f8a59adba85 100644 --- a/docs/scripts/model_feat_table.py +++ b/docs/scripts/model_feat_table.py @@ -1,11 +1,11 @@ import os +import sys from pathlib import Path from langchain_community import chat_models, llms from langchain_core.language_models.chat_models import BaseChatModel, SimpleChatModel from langchain_core.language_models.llms import LLM, BaseLLM -INTEGRATIONS_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "integrations" LLM_IGNORE = ("FakeListLLM", "OpenAIChat", "PromptLayerOpenAIChat") LLM_FEAT_TABLE_CORRECTION = { "TextGen": {"_astream": False, "_agenerate": False}, @@ -218,9 +218,17 @@ def get_chat_model_table() -> str: if __name__ == "__main__": + output_dir = Path(sys.argv[1]) + output_integrations_dir = output_dir / "integrations" + output_integrations_dir_llms = output_integrations_dir / "llms" + output_integrations_dir_chat = output_integrations_dir / "chat" + output_integrations_dir_llms.mkdir(parents=True, exist_ok=True) + output_integrations_dir_chat.mkdir(parents=True, exist_ok=True) + llm_page = LLM_TEMPLATE.format(table=get_llm_table()) - with open(INTEGRATIONS_DIR / "llms" / "index.mdx", "w") as f: + + with open(output_integrations_dir / "llms" / "index.mdx", "w") as f: f.write(llm_page) chat_model_page = CHAT_MODEL_TEMPLATE.format(table=get_chat_model_table()) - with open(INTEGRATIONS_DIR / "chat" / "index.mdx", "w") as f: + with open(output_integrations_dir / "chat" / "index.mdx", "w") as f: f.write(chat_model_page) diff --git a/docs/settings.ini b/docs/settings.ini deleted file mode 100644 index c5f865754e2..00000000000 --- a/docs/settings.ini +++ /dev/null @@ -1,11 +0,0 @@ -[DEFAULT] -nbs_path = . -recursive = True -tst_flags = notest -user = hwchase17 -doc_host = https://python.langchain.com -doc_baseurl = /docs -module_baseurls = metaflow=https://github.com/Netflix/metaflow/tree/master/ - fastcore=https://github.com/fastcore/tree/master -host = github - diff --git a/docs/vercel_build.sh b/docs/vercel_build.sh index f570319b46d..bfd9a3ed06f 100755 --- a/docs/vercel_build.sh +++ b/docs/vercel_build.sh @@ -2,39 +2,9 @@ set -e -yum -y update -yum install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip -y +make install-vercel-deps -# install quarto -wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.3.450/quarto-1.3.450-linux-amd64.tar.gz -tar -xzf quarto-1.3.450-linux-amd64.tar.gz -export PATH=$PATH:$(pwd)/quarto-1.3.450/bin/ +QUARTO_CMD="./quarto-1.3.450/bin/quarto" make build - -# setup python env -python3 -m venv .venv -source .venv/bin/activate -python3 -m pip install --upgrade pip -python3 -m pip install --upgrade uv -python3 -m uv pip install -r vercel_requirements.txt -python3 -m uv pip install -e $(ls ../libs/partners | grep -vE "airbyte|ibm|.md" | xargs -I {} echo "../libs/partners/{}") - -# autogenerate integrations tables -python3 scripts/model_feat_table.py - -# copy in external files -mkdir docs/templates -cp ../templates/docs/INDEX.md docs/templates/index.md -python3 scripts/copy_templates.py - -cp ../cookbook/README.md src/pages/cookbook.mdx - -wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md -python3 scripts/resolve_local_links.py docs/langserve.md https://github.com/langchain-ai/langserve/tree/main/ - -wget -q https://raw.githubusercontent.com/langchain-ai/langgraph/main/README.md -O docs/langgraph.md -python3 scripts/resolve_local_links.py docs/langgraph.md https://github.com/langchain-ai/langgraph/tree/main/ - -# render -quarto render docs/ -python3 scripts/generate_api_reference_links.py --docs_dir docs \ No newline at end of file +rm -rf docs +mv build/output/docs ./ \ No newline at end of file