diff --git a/docs/Makefile b/docs/Makefile index e33a6654f5b..66d59a46e2c 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -38,6 +38,8 @@ generate-files: $(PYTHON) scripts/model_feat_table.py $(INTERMEDIATE_DIR) + $(PYTHON) scripts/document_loader_feat_table.py $(INTERMEDIATE_DIR) + $(PYTHON) scripts/copy_templates.py $(INTERMEDIATE_DIR) wget -q https://raw.githubusercontent.com/langchain-ai/langserve/main/README.md -O $(INTERMEDIATE_DIR)/langserve.md diff --git a/docs/scripts/document_loader_feat_table.py b/docs/scripts/document_loader_feat_table.py new file mode 100644 index 00000000000..3e2fe65e503 --- /dev/null +++ b/docs/scripts/document_loader_feat_table.py @@ -0,0 +1,77 @@ +import sys +from pathlib import Path + +from langchain_community import document_loaders +from langchain_core.document_loaders.base import BaseLoader + +DOCUMENT_LOADER_TEMPLATE = """\ +--- +sidebar_position: 0 +sidebar_class_name: hidden +keywords: [compatibility] +custom_edit_url: +hide_table_of_contents: true +--- + +# Document loaders + +## Features + +The following table shows the feature support for all document loaders. + +{table} + +""" + + +def get_document_loader_table() -> str: + """Get the table of document loaders.""" + + doc_loaders_feat_table = {} + for cm in document_loaders.__all__: + doc_loaders_feat_table[cm] = {} + cls = getattr(document_loaders, cm) + if issubclass(cls, BaseLoader): + for feat in ("aload", "alazy_load", ("lazy_load", "lazy_loading")): + if isinstance(feat, tuple): + feat, name = feat + else: + feat, name = feat, feat + doc_loaders_feat_table[cm][name] = getattr(cls, feat) != getattr( + BaseLoader, feat + ) + native_async = ( + doc_loaders_feat_table[cm]["aload"] + or doc_loaders_feat_table[cm]["alazy_load"] + ) + del doc_loaders_feat_table[cm]["aload"] + del doc_loaders_feat_table[cm]["alazy_load"] + doc_loaders_feat_table[cm]["native_async"] = native_async + doc_loaders_feat_table[cm]["description"] = (cls.__doc__ or "").split("\n")[ + 0 + ] + + header = ["loader", "description", "lazy_loading", "native_async"] + title = ["Document Loader", "Description", "Lazy loading", "Native async support"] + rows = [title, [":-"] + [":-:"] * (len(title) - 1)] + for loader, feats in sorted(doc_loaders_feat_table.items()): + if not feats: + continue + rows += [ + [loader, feats["description"]] + + ["✅" if feats.get(h) else "❌" for h in header[2:]] + ] + return "\n".join(["|".join(row) for row in rows]) + + +if __name__ == "__main__": + output_dir = Path(sys.argv[1]) + output_integrations_dir = output_dir / "integrations" + output_integrations_dir_doc_loaders = output_integrations_dir / "document_loaders" + output_integrations_dir_doc_loaders.mkdir(parents=True, exist_ok=True) + + document_loader_page = DOCUMENT_LOADER_TEMPLATE.format( + table=get_document_loader_table() + ) + with open(output_integrations_dir / "document_loaders" / "index.mdx", "w") as f: + f.write(document_loader_page) diff --git a/docs/sidebars.js b/docs/sidebars.js index 5311f181920..5a7b2c56548 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -184,8 +184,8 @@ module.exports = { }, ], link: { - type: "generated-index", - slug: "integrations/document_loaders", + type: "doc", + id: "integrations/document_loaders/index", }, }, {