mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-04 06:37:58 +00:00
infra: cleanup docs build (#21134)
Refactors the docs build in order to: - run the same `make build` command in both vercel and local build - incrementally build artifacts in 2 distinct steps, instead of building all docs in-place (in vercel) or in a _dist dir (locally) Highlights: - introduces `make build` in order to build the docs - collects and generates all files for the build in `docs/build/intermediate` - renders those jupyter notebook + markdown files into `docs/build/outputs` And now the outputs to host are in `docs/build/outputs`, which will need a vercel settings change. Todo: - [ ] figure out how to point the right directory (right now deleting and moving docs dir in vercel_build.sh isn't great)
This commit is contained in:
parent
6fa8626e2f
commit
cd4c54282a
9
Makefile
9
Makefile
@ -17,16 +17,11 @@ clean: docs_clean api_docs_clean
|
||||
|
||||
## docs_build: Build the documentation.
|
||||
docs_build:
|
||||
docs/.local_build.sh
|
||||
cd docs && make build-local
|
||||
|
||||
## docs_clean: Clean the documentation build artifacts.
|
||||
docs_clean:
|
||||
@if [ -d _dist ]; then \
|
||||
rm -r _dist; \
|
||||
echo "Directory _dist has been cleaned."; \
|
||||
else \
|
||||
echo "Nothing to clean."; \
|
||||
fi
|
||||
cd docs && make clean
|
||||
|
||||
## docs_linkcheck: Run linkchecker on the documentation.
|
||||
docs_linkcheck:
|
||||
|
1
docs/.gitignore
vendored
1
docs/.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
/.quarto/
|
||||
src/supabase.d.ts
|
||||
build
|
@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
set -o xtrace
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")"; pwd)"
|
||||
cd "${SCRIPT_DIR}"
|
||||
|
||||
mkdir -p ../_dist
|
||||
rsync -ruv --exclude node_modules --exclude api_reference --exclude .venv --exclude .docusaurus . ../_dist
|
||||
cd ../_dist
|
||||
poetry run python scripts/model_feat_table.py
|
||||
cp ../cookbook/README.md src/pages/cookbook.mdx
|
||||
mkdir -p docs/templates
|
||||
cp ../templates/docs/INDEX.md docs/templates/index.md
|
||||
poetry run python scripts/copy_templates.py
|
||||
wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md
|
||||
wget -q https://raw.githubusercontent.com/langchain-ai/langgraph/main/README.md -O docs/langgraph.md
|
||||
|
||||
|
||||
poetry run quarto render docs
|
||||
poetry run python scripts/generate_api_reference_links.py --docs_dir docs
|
||||
|
||||
yarn
|
||||
yarn start
|
80
docs/Makefile
Normal file
80
docs/Makefile
Normal file
@ -0,0 +1,80 @@
|
||||
# we build the docs in these stages:
|
||||
# 1. install quarto and python dependencies
|
||||
# 2. copy files from "source dir" to "intermediate dir"
|
||||
# 2. generate files like model feat table, etc in "intermediate dir"
|
||||
# 3. copy files to their right spots (e.g. langserve readme) in "intermediate dir"
|
||||
# 4. build the docs from "intermediate dir" to "output dir"
|
||||
|
||||
SOURCE_DIR = docs/
|
||||
INTERMEDIATE_DIR = build/intermediate/docs
|
||||
OUTPUT_DIR = build/output
|
||||
OUTPUT_DOCS_DIR = $(OUTPUT_DIR)/docs
|
||||
|
||||
PYTHON = .venv/bin/python
|
||||
|
||||
QUARTO_CMD ?= quarto
|
||||
|
||||
PARTNER_DEPS_LIST := $(shell ls -1 ../libs/partners | grep -vE "airbyte|ibm" | xargs -I {} echo "../libs/partners/{}" | tr '\n' ' ')
|
||||
|
||||
PORT ?= 3001
|
||||
|
||||
clean:
|
||||
rm -rf build
|
||||
|
||||
install-vercel-deps:
|
||||
yum -y update
|
||||
yum install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip rsync -y
|
||||
|
||||
wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.3.450/quarto-1.3.450-linux-amd64.tar.gz
|
||||
tar -xzf quarto-1.3.450-linux-amd64.tar.gz
|
||||
|
||||
install-py-deps:
|
||||
python3 -m venv .venv
|
||||
$(PYTHON) -m pip install --upgrade pip
|
||||
$(PYTHON) -m pip install --upgrade uv
|
||||
$(PYTHON) -m uv pip install -r vercel_requirements.txt
|
||||
$(PYTHON) -m uv pip install --editable $(PARTNER_DEPS_LIST)
|
||||
|
||||
generate-files:
|
||||
mkdir -p $(INTERMEDIATE_DIR)
|
||||
cp -r $(SOURCE_DIR)/* $(INTERMEDIATE_DIR)
|
||||
mkdir -p $(INTERMEDIATE_DIR)/templates
|
||||
cp ../templates/docs/INDEX.md $(INTERMEDIATE_DIR)/templates/index.md
|
||||
cp ../cookbook/README.md $(INTERMEDIATE_DIR)/cookbook.mdx
|
||||
|
||||
$(PYTHON) scripts/model_feat_table.py $(INTERMEDIATE_DIR)
|
||||
|
||||
$(PYTHON) scripts/copy_templates.py $(INTERMEDIATE_DIR)
|
||||
|
||||
wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O $(INTERMEDIATE_DIR)/langserve.md
|
||||
$(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langserve.md https://github.com/langchain-ai/langserve/tree/main/
|
||||
|
||||
wget -q https://raw.githubusercontent.com/langchain-ai/langgraph/main/README.md -O $(INTERMEDIATE_DIR)/langgraph.md
|
||||
$(PYTHON) scripts/resolve_local_links.py $(INTERMEDIATE_DIR)/langgraph.md https://github.com/langchain-ai/langgraph/tree/main/
|
||||
|
||||
$(PYTHON) scripts/generate_api_reference_links.py --docs_dir $(INTERMEDIATE_DIR)
|
||||
|
||||
copy-infra:
|
||||
mkdir -p $(OUTPUT_DIR)
|
||||
cp -r src $(OUTPUT_DIR)
|
||||
cp vercel.json $(OUTPUT_DIR)
|
||||
cp babel.config.js $(OUTPUT_DIR)
|
||||
cp -r data $(OUTPUT_DIR)
|
||||
cp docusaurus.config.js $(OUTPUT_DIR)
|
||||
cp package.json $(OUTPUT_DIR)
|
||||
cp sidebars.js $(OUTPUT_DIR)
|
||||
cp -r static $(OUTPUT_DIR)
|
||||
cp yarn.lock $(OUTPUT_DIR)
|
||||
|
||||
quarto-render:
|
||||
$(QUARTO_CMD) render $(INTERMEDIATE_DIR) --output-dir $(OUTPUT_DOCS_DIR) --no-execute
|
||||
mv $(OUTPUT_DOCS_DIR)/$(INTERMEDIATE_DIR)/* $(OUTPUT_DOCS_DIR)
|
||||
rm -rf $(OUTPUT_DOCS_DIR)/build
|
||||
|
||||
md-sync:
|
||||
rsync -avm --include="*/" --include="*.mdx" --include="*.md" --exclude="*" $(INTERMEDIATE_DIR)/ $(OUTPUT_DOCS_DIR)
|
||||
|
||||
build: install-py-deps generate-files copy-infra quarto-render md-sync
|
||||
|
||||
start:
|
||||
cd $(OUTPUT_DIR) && yarn && yarn start --port=$(PORT)
|
@ -1,76 +0,0 @@
|
||||
/* eslint-disable prefer-template */
|
||||
/* eslint-disable no-param-reassign */
|
||||
// eslint-disable-next-line import/no-extraneous-dependencies
|
||||
const babel = require("@babel/core");
|
||||
const path = require("path");
|
||||
const fs = require("fs");
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string|Buffer} content Content of the resource file
|
||||
* @param {object} [map] SourceMap data consumable by https://github.com/mozilla/source-map
|
||||
* @param {any} [meta] Meta data, could be anything
|
||||
*/
|
||||
async function webpackLoader(content, map, meta) {
|
||||
const cb = this.async();
|
||||
|
||||
if (!this.resourcePath.endsWith(".ts")) {
|
||||
cb(null, JSON.stringify({ content, imports: [] }), map, meta);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await babel.parseAsync(content, {
|
||||
sourceType: "module",
|
||||
filename: this.resourcePath,
|
||||
});
|
||||
|
||||
const imports = [];
|
||||
|
||||
result.program.body.forEach((node) => {
|
||||
if (node.type === "ImportDeclaration") {
|
||||
const source = node.source.value;
|
||||
|
||||
if (!source.startsWith("langchain")) {
|
||||
return;
|
||||
}
|
||||
|
||||
node.specifiers.forEach((specifier) => {
|
||||
if (specifier.type === "ImportSpecifier") {
|
||||
const local = specifier.local.name;
|
||||
const imported = specifier.imported.name;
|
||||
imports.push({ local, imported, source });
|
||||
} else {
|
||||
throw new Error("Unsupported import type");
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
imports.forEach((imp) => {
|
||||
const { imported, source } = imp;
|
||||
const moduleName = source.split("/").slice(1).join("_");
|
||||
const docsPath = path.resolve(__dirname, "docs", "api", moduleName);
|
||||
const available = fs.readdirSync(docsPath, { withFileTypes: true });
|
||||
const found = available.find(
|
||||
(dirent) =>
|
||||
dirent.isDirectory() &&
|
||||
fs.existsSync(path.resolve(docsPath, dirent.name, imported + ".md"))
|
||||
);
|
||||
if (found) {
|
||||
imp.docs =
|
||||
"/" + path.join("docs", "api", moduleName, found.name, imported);
|
||||
} else {
|
||||
throw new Error(
|
||||
`Could not find docs for ${source}.${imported} in docs/api/`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
cb(null, JSON.stringify({ content, imports }), map, meta);
|
||||
} catch (err) {
|
||||
cb(err);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = webpackLoader;
|
@ -330,7 +330,7 @@
|
||||
"id": "da9a9239",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For more information refer to [OpenVINO LLM guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html) and [OpenVINO Local Pipelines notebook](./openvino.ipynb)."
|
||||
"For more information refer to [OpenVINO LLM guide](https://docs.openvino.ai/2024/learn-openvino/llm_inference_guide.html) and [OpenVINO Local Pipelines notebook](/docs/integrations/llms/openvino/)."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -67,7 +67,7 @@ from langchain_community.embeddings import QuantizedBgeEmbeddings
|
||||
|
||||
### Weight-Only Quantization with ITREX
|
||||
|
||||
See a [usage example](../docs/integrations/llms/weight_only_quantization.ipynb).
|
||||
See a [usage example](/docs/integrations/llms/weight_only_quantization).
|
||||
|
||||
## Detail of Configuration Parameters
|
||||
|
||||
|
@ -2,35 +2,44 @@ import glob
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[2] / "templates"
|
||||
DOCS_TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "templates"
|
||||
if __name__ == "__main__":
|
||||
intermediate_dir = Path(sys.argv[1])
|
||||
|
||||
templates_source_dir = Path(os.path.abspath(__file__)).parents[2] / "templates"
|
||||
templates_intermediate_dir = intermediate_dir / "templates"
|
||||
|
||||
readmes = list(glob.glob(str(TEMPLATES_DIR) + "/*/README.md"))
|
||||
destinations = [readme[len(str(TEMPLATES_DIR)) + 1 : -10] + ".md" for readme in readmes]
|
||||
for source, destination in zip(readmes, destinations):
|
||||
full_destination = DOCS_TEMPLATES_DIR / destination
|
||||
shutil.copyfile(source, full_destination)
|
||||
with open(full_destination, "r") as f:
|
||||
content = f.read()
|
||||
# remove images
|
||||
content = re.sub("\!\[.*?\]\((.*?)\)", "", content)
|
||||
with open(full_destination, "w") as f:
|
||||
f.write(content)
|
||||
readmes = list(glob.glob(str(templates_source_dir) + "/*/README.md"))
|
||||
destinations = [
|
||||
readme[len(str(templates_source_dir)) + 1 : -10] + ".md" for readme in readmes
|
||||
]
|
||||
for source, destination in zip(readmes, destinations):
|
||||
full_destination = templates_intermediate_dir / destination
|
||||
shutil.copyfile(source, full_destination)
|
||||
with open(full_destination, "r") as f:
|
||||
content = f.read()
|
||||
# remove images
|
||||
content = re.sub("\!\[.*?\]\((.*?)\)", "", content)
|
||||
with open(full_destination, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
sidebar_hidden = """---
|
||||
sidebar_hidden = """---
|
||||
sidebar_class_name: hidden
|
||||
---
|
||||
|
||||
"""
|
||||
TEMPLATES_INDEX_DESTINATION = DOCS_TEMPLATES_DIR / "index.md"
|
||||
with open(TEMPLATES_INDEX_DESTINATION, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# replace relative links
|
||||
content = re.sub("\]\(\.\.\/", "](/docs/templates/", content)
|
||||
# handle index file
|
||||
templates_index_source = templates_source_dir / "docs" / "INDEX.md"
|
||||
templates_index_intermediate = templates_intermediate_dir / "index.md"
|
||||
|
||||
with open(TEMPLATES_INDEX_DESTINATION, "w") as f:
|
||||
f.write(sidebar_hidden + content)
|
||||
with open(templates_index_source, "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# replace relative links
|
||||
content = re.sub("\]\(\.\.\/", "](/docs/templates/", content)
|
||||
|
||||
with open(templates_index_intermediate, "w") as f:
|
||||
f.write(sidebar_hidden + content)
|
||||
|
@ -25,7 +25,6 @@ _IMPORT_RE = re.compile(
|
||||
_CURRENT_PATH = Path(__file__).parent.absolute()
|
||||
# Directory where generated markdown files are stored
|
||||
_DOCS_DIR = _CURRENT_PATH / "docs"
|
||||
_JSON_PATH = _CURRENT_PATH / "api_reference" / "guide_imports.json"
|
||||
|
||||
|
||||
def find_files(path):
|
||||
@ -55,6 +54,12 @@ def get_args():
|
||||
default=_DOCS_DIR,
|
||||
help="Directory where generated markdown files are stored",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json_path",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Path to store the generated JSON file",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@ -83,9 +88,11 @@ def main():
|
||||
global_imports[class_name][doc_title] = doc_url
|
||||
|
||||
# Write the global imports information to a JSON file
|
||||
_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with _JSON_PATH.open("w") as f:
|
||||
json.dump(global_imports, f)
|
||||
if args.json_path:
|
||||
json_path = Path(args.json_path)
|
||||
json_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with json_path.open("w") as f:
|
||||
json.dump(global_imports, f)
|
||||
|
||||
|
||||
def _get_doc_title(data: str, file_name: str) -> str:
|
||||
|
@ -1,11 +1,11 @@
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from langchain_community import chat_models, llms
|
||||
from langchain_core.language_models.chat_models import BaseChatModel, SimpleChatModel
|
||||
from langchain_core.language_models.llms import LLM, BaseLLM
|
||||
|
||||
INTEGRATIONS_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "integrations"
|
||||
LLM_IGNORE = ("FakeListLLM", "OpenAIChat", "PromptLayerOpenAIChat")
|
||||
LLM_FEAT_TABLE_CORRECTION = {
|
||||
"TextGen": {"_astream": False, "_agenerate": False},
|
||||
@ -218,9 +218,17 @@ def get_chat_model_table() -> str:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
output_dir = Path(sys.argv[1])
|
||||
output_integrations_dir = output_dir / "integrations"
|
||||
output_integrations_dir_llms = output_integrations_dir / "llms"
|
||||
output_integrations_dir_chat = output_integrations_dir / "chat"
|
||||
output_integrations_dir_llms.mkdir(parents=True, exist_ok=True)
|
||||
output_integrations_dir_chat.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
llm_page = LLM_TEMPLATE.format(table=get_llm_table())
|
||||
with open(INTEGRATIONS_DIR / "llms" / "index.mdx", "w") as f:
|
||||
|
||||
with open(output_integrations_dir / "llms" / "index.mdx", "w") as f:
|
||||
f.write(llm_page)
|
||||
chat_model_page = CHAT_MODEL_TEMPLATE.format(table=get_chat_model_table())
|
||||
with open(INTEGRATIONS_DIR / "chat" / "index.mdx", "w") as f:
|
||||
with open(output_integrations_dir / "chat" / "index.mdx", "w") as f:
|
||||
f.write(chat_model_page)
|
||||
|
@ -1,11 +0,0 @@
|
||||
[DEFAULT]
|
||||
nbs_path = .
|
||||
recursive = True
|
||||
tst_flags = notest
|
||||
user = hwchase17
|
||||
doc_host = https://python.langchain.com
|
||||
doc_baseurl = /docs
|
||||
module_baseurls = metaflow=https://github.com/Netflix/metaflow/tree/master/
|
||||
fastcore=https://github.com/fastcore/tree/master
|
||||
host = github
|
||||
|
@ -2,39 +2,9 @@
|
||||
|
||||
set -e
|
||||
|
||||
yum -y update
|
||||
yum install gcc bzip2-devel libffi-devel zlib-devel wget tar gzip -y
|
||||
make install-vercel-deps
|
||||
|
||||
# install quarto
|
||||
wget -q https://github.com/quarto-dev/quarto-cli/releases/download/v1.3.450/quarto-1.3.450-linux-amd64.tar.gz
|
||||
tar -xzf quarto-1.3.450-linux-amd64.tar.gz
|
||||
export PATH=$PATH:$(pwd)/quarto-1.3.450/bin/
|
||||
QUARTO_CMD="./quarto-1.3.450/bin/quarto" make build
|
||||
|
||||
|
||||
# setup python env
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install --upgrade uv
|
||||
python3 -m uv pip install -r vercel_requirements.txt
|
||||
python3 -m uv pip install -e $(ls ../libs/partners | grep -vE "airbyte|ibm|.md" | xargs -I {} echo "../libs/partners/{}")
|
||||
|
||||
# autogenerate integrations tables
|
||||
python3 scripts/model_feat_table.py
|
||||
|
||||
# copy in external files
|
||||
mkdir docs/templates
|
||||
cp ../templates/docs/INDEX.md docs/templates/index.md
|
||||
python3 scripts/copy_templates.py
|
||||
|
||||
cp ../cookbook/README.md src/pages/cookbook.mdx
|
||||
|
||||
wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O docs/langserve.md
|
||||
python3 scripts/resolve_local_links.py docs/langserve.md https://github.com/langchain-ai/langserve/tree/main/
|
||||
|
||||
wget -q https://raw.githubusercontent.com/langchain-ai/langgraph/main/README.md -O docs/langgraph.md
|
||||
python3 scripts/resolve_local_links.py docs/langgraph.md https://github.com/langchain-ai/langgraph/tree/main/
|
||||
|
||||
# render
|
||||
quarto render docs/
|
||||
python3 scripts/generate_api_reference_links.py --docs_dir docs
|
||||
rm -rf docs
|
||||
mv build/output/docs ./
|
Loading…
Reference in New Issue
Block a user