infra: cleanup docs build (#21134)

Refactors the docs build in order to:
- run the same `make build` command in both vercel and local build
- incrementally build artifacts in 2 distinct steps, instead of building
all docs in-place (in vercel) or in a _dist dir (locally)

Highlights:
- introduces `make build` in order to build the docs
- collects and generates all files for the build in
`docs/build/intermediate`
- renders those jupyter notebook + markdown files into
`docs/build/outputs`

And now the outputs to host are in `docs/build/outputs`, which will need
a vercel settings change.

Todo:
- [ ] figure out how to point the right directory (right now deleting
and moving docs dir in vercel_build.sh isn't great)
This commit is contained in:
Erick Friis
2024-05-01 17:34:05 -07:00
committed by GitHub
parent 6fa8626e2f
commit cd4c54282a
12 changed files with 141 additions and 185 deletions

View File

@@ -2,35 +2,44 @@ import glob
import os
import re
import shutil
import sys
from pathlib import Path
TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[2] / "templates"
DOCS_TEMPLATES_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "templates"
if __name__ == "__main__":
intermediate_dir = Path(sys.argv[1])
templates_source_dir = Path(os.path.abspath(__file__)).parents[2] / "templates"
templates_intermediate_dir = intermediate_dir / "templates"
readmes = list(glob.glob(str(TEMPLATES_DIR) + "/*/README.md"))
destinations = [readme[len(str(TEMPLATES_DIR)) + 1 : -10] + ".md" for readme in readmes]
for source, destination in zip(readmes, destinations):
full_destination = DOCS_TEMPLATES_DIR / destination
shutil.copyfile(source, full_destination)
with open(full_destination, "r") as f:
content = f.read()
# remove images
content = re.sub("\!\[.*?\]\((.*?)\)", "", content)
with open(full_destination, "w") as f:
f.write(content)
readmes = list(glob.glob(str(templates_source_dir) + "/*/README.md"))
destinations = [
readme[len(str(templates_source_dir)) + 1 : -10] + ".md" for readme in readmes
]
for source, destination in zip(readmes, destinations):
full_destination = templates_intermediate_dir / destination
shutil.copyfile(source, full_destination)
with open(full_destination, "r") as f:
content = f.read()
# remove images
content = re.sub("\!\[.*?\]\((.*?)\)", "", content)
with open(full_destination, "w") as f:
f.write(content)
sidebar_hidden = """---
sidebar_hidden = """---
sidebar_class_name: hidden
---
"""
TEMPLATES_INDEX_DESTINATION = DOCS_TEMPLATES_DIR / "index.md"
with open(TEMPLATES_INDEX_DESTINATION, "r") as f:
content = f.read()
# replace relative links
content = re.sub("\]\(\.\.\/", "](/docs/templates/", content)
# handle index file
templates_index_source = templates_source_dir / "docs" / "INDEX.md"
templates_index_intermediate = templates_intermediate_dir / "index.md"
with open(TEMPLATES_INDEX_DESTINATION, "w") as f:
f.write(sidebar_hidden + content)
with open(templates_index_source, "r") as f:
content = f.read()
# replace relative links
content = re.sub("\]\(\.\.\/", "](/docs/templates/", content)
with open(templates_index_intermediate, "w") as f:
f.write(sidebar_hidden + content)

View File

@@ -25,7 +25,6 @@ _IMPORT_RE = re.compile(
_CURRENT_PATH = Path(__file__).parent.absolute()
# Directory where generated markdown files are stored
_DOCS_DIR = _CURRENT_PATH / "docs"
_JSON_PATH = _CURRENT_PATH / "api_reference" / "guide_imports.json"
def find_files(path):
@@ -55,6 +54,12 @@ def get_args():
default=_DOCS_DIR,
help="Directory where generated markdown files are stored",
)
parser.add_argument(
"--json_path",
type=str,
default=None,
help="Path to store the generated JSON file",
)
return parser.parse_args()
@@ -83,9 +88,11 @@ def main():
global_imports[class_name][doc_title] = doc_url
# Write the global imports information to a JSON file
_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
with _JSON_PATH.open("w") as f:
json.dump(global_imports, f)
if args.json_path:
json_path = Path(args.json_path)
json_path.parent.mkdir(parents=True, exist_ok=True)
with json_path.open("w") as f:
json.dump(global_imports, f)
def _get_doc_title(data: str, file_name: str) -> str:

View File

@@ -1,11 +1,11 @@
import os
import sys
from pathlib import Path
from langchain_community import chat_models, llms
from langchain_core.language_models.chat_models import BaseChatModel, SimpleChatModel
from langchain_core.language_models.llms import LLM, BaseLLM
INTEGRATIONS_DIR = Path(os.path.abspath(__file__)).parents[1] / "docs" / "integrations"
LLM_IGNORE = ("FakeListLLM", "OpenAIChat", "PromptLayerOpenAIChat")
LLM_FEAT_TABLE_CORRECTION = {
"TextGen": {"_astream": False, "_agenerate": False},
@@ -218,9 +218,17 @@ def get_chat_model_table() -> str:
if __name__ == "__main__":
output_dir = Path(sys.argv[1])
output_integrations_dir = output_dir / "integrations"
output_integrations_dir_llms = output_integrations_dir / "llms"
output_integrations_dir_chat = output_integrations_dir / "chat"
output_integrations_dir_llms.mkdir(parents=True, exist_ok=True)
output_integrations_dir_chat.mkdir(parents=True, exist_ok=True)
llm_page = LLM_TEMPLATE.format(table=get_llm_table())
with open(INTEGRATIONS_DIR / "llms" / "index.mdx", "w") as f:
with open(output_integrations_dir / "llms" / "index.mdx", "w") as f:
f.write(llm_page)
chat_model_page = CHAT_MODEL_TEMPLATE.format(table=get_chat_model_table())
with open(INTEGRATIONS_DIR / "chat" / "index.mdx", "w") as f:
with open(output_integrations_dir / "chat" / "index.mdx", "w") as f:
f.write(chat_model_page)