mv docs extras (#11399)

2025-09-04 20:46:45 +00:00 · 2023-10-06 10:09:41 -07:00
parent 53887242a1
commit 88ab69c288
936 changed files with 522 additions and 1582 deletions
--- a/docs/scripts/generate_api_reference_links.py
+++ b/docs/scripts/generate_api_reference_links.py
@@ -0,0 +1,183 @@
+import importlib
+import inspect
+import json
+import logging
+import os
+import re
+from pathlib import Path
+import argparse
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Base URL for all class documentation
+_BASE_URL = "https://api.python.langchain.com/en/latest/"
+
+# Regular expression to match Python code blocks
+code_block_re = re.compile(r"^(```python\n)(.*?)(```\n)", re.DOTALL | re.MULTILINE)
+# Regular expression to match langchain import lines
+_IMPORT_RE = re.compile(
+    r"from\s+(langchain\.\w+(\.\w+)*?)\s+import\s+"
+    r"((?:\w+(?:,\s*)?)*"  # Match zero or more words separated by a comma+optional ws
+    r"(?:\s*\(.*?\))?)",  # Match optional parentheses block
+    re.DOTALL,  # Match newlines as well
+)
+
+_CURRENT_PATH = Path(__file__).parent.absolute()
+# Directory where generated markdown files are stored
+_DOCS_DIR = _CURRENT_PATH / "docs_skeleton" / "docs"
+_JSON_PATH = _CURRENT_PATH / "api_reference" / "guide_imports.json"
+
+
+def find_files(path):
+    """Find all MDX files in the given path"""
+    # Check if is file first
+    if os.path.isfile(path):
+        yield path
+        return
+    for root, _, files in os.walk(path):
+        for file in files:
+            if file.endswith(".mdx") or file.endswith(".md"):
+                yield os.path.join(root, file)
+
+
+def get_full_module_name(module_path, class_name):
+    """Get full module name using inspect"""
+    module = importlib.import_module(module_path)
+    class_ = getattr(module, class_name)
+    return inspect.getmodule(class_).__name__
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--docs_dir",
+        type=str,
+        default=_DOCS_DIR,
+        help="Directory where generated markdown files are stored",
+    )
+    return parser.parse_args()
+
+
+def main():
+    """Main function"""
+    args = get_args()
+    global_imports = {}
+
+    for file in find_files(args.docs_dir):
+        print(f"Adding links for imports in {file}")
+        file_imports = replace_imports(file)
+
+        if file_imports:
+            # Use relative file path as key
+            relative_path = (
+                os.path.relpath(file, _DOCS_DIR).replace(".mdx", "").replace(".md", "")
+            )
+
+            doc_url = f"https://python.langchain.com/docs/{relative_path}"
+            for import_info in file_imports:
+                doc_title = import_info["title"]
+                class_name = import_info["imported"]
+                if class_name not in global_imports:
+                    global_imports[class_name] = {}
+                global_imports[class_name][doc_title] = doc_url
+
+    # Write the global imports information to a JSON file
+    _JSON_PATH.parent.mkdir(parents=True, exist_ok=True)
+    with _JSON_PATH.open("w") as f:
+        json.dump(global_imports, f)
+
+
+def _get_doc_title(data: str, file_name: str) -> str:
+    try:
+        return re.findall(r"^#\s+(.*)", data, re.MULTILINE)[0]
+    except IndexError:
+        pass
+    # Parse the rst-style titles
+    try:
+        return re.findall(r"^(.*)\n=+\n", data, re.MULTILINE)[0]
+    except IndexError:
+        return file_name
+
+
+def replace_imports(file):
+    """Replace imports in each Python code block with links to their
+    documentation and append the import info in a comment"""
+    all_imports = []
+    with open(file, "r") as f:
+        data = f.read()
+
+    file_name = os.path.basename(file)
+    _DOC_TITLE = _get_doc_title(data, file_name)
+
+    def replacer(match):
+        # Extract the code block content
+        code = match.group(2)
+        # Replace if any import comment exists
+        # TODO: Use our own custom <code> component rather than this
+        # injection method
+        existing_comment_re = re.compile(r"^<!--IMPORTS:.*?-->\n", re.MULTILINE)
+        code = existing_comment_re.sub("", code)
+
+        # Process imports in the code block
+        imports = []
+        for import_match in _IMPORT_RE.finditer(code):
+            module = import_match.group(1)
+            imports_str = (
+                import_match.group(3).replace("(\n", "").replace("\n)", "")
+            )  # Handle newlines within parentheses
+            # remove any newline and spaces, then split by comma
+            imported_classes = [
+                imp.strip()
+                for imp in re.split(r",\s*", imports_str.replace("\n", ""))
+                if imp.strip()
+            ]
+            for class_name in imported_classes:
+                try:
+                    module_path = get_full_module_name(module, class_name)
+                except AttributeError as e:
+                    logger.warning(f"Could not find module for {class_name}, {e}")
+                    continue
+                except ImportError as e:
+                    logger.warning(f"Failed to load for class {class_name}, {e}")
+                    continue
+
+                url = (
+                    _BASE_URL
+                    + module_path.split(".")[1]
+                    + "/"
+                    + module_path
+                    + "."
+                    + class_name
+                    + ".html"
+                )
+
+                # Add the import information to our list
+                imports.append(
+                    {
+                        "imported": class_name,
+                        "source": module,
+                        "docs": url,
+                        "title": _DOC_TITLE,
+                    }
+                )
+
+        if imports:
+            all_imports.extend(imports)
+            # Create a unique comment containing the import information
+            import_comment = f"<!--IMPORTS:{json.dumps(imports)}-->"
+            # Inject the import comment at the start of the code block
+            return match.group(1) + import_comment + "\n" + code + match.group(3)
+        else:
+            # If there are no imports, return the original match
+            return match.group(0)
+
+    # Use re.sub to replace each Python code block
+    data = code_block_re.sub(replacer, data)
+
+    with open(file, "w") as f:
+        f.write(data)
+    return all_imports
+
+
+if __name__ == "__main__":
+    main()
--- a/docs/scripts/model_feat_table.py
+++ b/docs/scripts/model_feat_table.py
@@ -0,0 +1,161 @@
+import os
+from pathlib import Path
+
+from langchain import chat_models, llms
+from langchain.chat_models.base import BaseChatModel, SimpleChatModel
+from langchain.llms.base import BaseLLM, LLM
+
+INTEGRATIONS_DIR = (
+    Path(os.path.abspath(__file__)).parents[1]
+    / "docs_skeleton"
+    / "docs"
+    / "integrations"
+)
+LLM_IGNORE = ("FakeListLLM", "OpenAIChat", "PromptLayerOpenAIChat")
+LLM_FEAT_TABLE_CORRECTION = {
+    "TextGen": {"_astream": False, "_agenerate": False},
+    "Ollama": {
+        "_stream": False,
+    },
+    "PromptLayerOpenAI": {"batch_generate": False, "batch_agenerate": False},
+}
+CHAT_MODEL_IGNORE = ("FakeListChatModel", "HumanInputChatModel")
+CHAT_MODEL_FEAT_TABLE_CORRECTION = {
+    "ChatMLflowAIGateway": {"_agenerate": False},
+    "PromptLayerChatOpenAI": {"_stream": False, "_astream": False},
+    "ChatKonko": {"_astream": False, "_agenerate": False},
+}
+
+LLM_TEMPLATE = """\
+---
+sidebar_position: 0
+sidebar_class_name: hidden
+---
+
+# LLMs
+
+import DocCardList from "@theme/DocCardList";
+
+## Features (natively supported)
+All LLMs implement the Runnable interface, which comes with default implementations of all methods, ie. `ainvoke`, `batch`, `abatch`, `stream`, `astream`. This gives all LLMs basic support for async, streaming and batch, which by default is implemented as below:
+- *Async* support defaults to calling the respective sync method in asyncio's default thread pool executor. This lets other async functions in your application make progress while the LLM is being executed, by moving this call to a background thread.
+- *Streaming* support defaults to returning an `Iterator` (or `AsyncIterator` in the case of async streaming) of a single value, the final result returned by the underlying LLM provider. This obviously doesn't give you token-by-token streaming, which requires native support from the LLM provider, but ensures your code that expects an iterator of tokens can work for any of our LLM integrations.
+- *Batch* support defaults to calling the underlying LLM in parallel for each input by making use of a thread pool executor (in the sync batch case) or `asyncio.gather` (in the async batch case). The concurrency can be controlled with the `max_concurrency` key in `RunnableConfig`.
+
+Each LLM integration can optionally provide native implementations for async, streaming or batch, which, for providers that support it, can be more efficient. The table shows, for each integration, which features have been implemented with native support.
+
+{table}
+
+<DocCardList />
+"""
+
+CHAT_MODEL_TEMPLATE = """\
+---
+sidebar_position: 1
+sidebar_class_name: hidden
+---
+
+# Chat models
+
+import DocCardList from "@theme/DocCardList";
+
+## Features (natively supported)
+All ChatModels implement the Runnable interface, which comes with default implementations of all methods, ie. `ainvoke`, `batch`, `abatch`, `stream`, `astream`. This gives all ChatModels basic support for async, streaming and batch, which by default is implemented as below:
+- *Async* support defaults to calling the respective sync method in asyncio's default thread pool executor. This lets other async functions in your application make progress while the ChatModel is being executed, by moving this call to a background thread.
+- *Streaming* support defaults to returning an `Iterator` (or `AsyncIterator` in the case of async streaming) of a single value, the final result returned by the underlying ChatModel provider. This obviously doesn't give you token-by-token streaming, which requires native support from the ChatModel provider, but ensures your code that expects an iterator of tokens can work for any of our ChatModel integrations.
+- *Batch* support defaults to calling the underlying ChatModel in parallel for each input by making use of a thread pool executor (in the sync batch case) or `asyncio.gather` (in the async batch case). The concurrency can be controlled with the `max_concurrency` key in `RunnableConfig`.
+
+Each ChatModel integration can optionally provide native implementations to truly enable async or streaming.
+The table shows, for each integration, which features have been implemented with native support.
+
+{table}
+
+<DocCardList />
+"""
+
+
+def get_llm_table():
+    llm_feat_table = {}
+    for cm in llms.__all__:
+        llm_feat_table[cm] = {}
+        cls = getattr(llms, cm)
+        if issubclass(cls, LLM):
+            for feat in ("_stream", "_astream", ("_acall", "_agenerate")):
+                if isinstance(feat, tuple):
+                    feat, name = feat
+                else:
+                    feat, name = feat, feat
+                llm_feat_table[cm][name] = getattr(cls, feat) != getattr(LLM, feat)
+        else:
+            for feat in [
+                "_stream",
+                "_astream",
+                ("_generate", "batch_generate"),
+                "_agenerate",
+                ("_agenerate", "batch_agenerate"),
+            ]:
+                if isinstance(feat, tuple):
+                    feat, name = feat
+                else:
+                    feat, name = feat, feat
+                llm_feat_table[cm][name] = getattr(cls, feat) != getattr(BaseLLM, feat)
+    final_feats = {
+        k: v
+        for k, v in {**llm_feat_table, **LLM_FEAT_TABLE_CORRECTION}.items()
+        if k not in LLM_IGNORE
+    }
+
+    header = [
+        "model",
+        "_agenerate",
+        "_stream",
+        "_astream",
+        "batch_generate",
+        "batch_agenerate",
+    ]
+    title = [
+        "Model",
+        "Invoke",
+        "Async invoke",
+        "Stream",
+        "Async stream",
+        "Batch",
+        "Async batch",
+    ]
+    rows = [title, [":-"] + [":-:"] * (len(title) - 1)]
+    for llm, feats in sorted(final_feats.items()):
+        rows += [[llm, "✅"] + ["✅" if feats.get(h) else "❌" for h in header[1:]]]
+    return "\n".join(["|".join(row) for row in rows])
+
+
+def get_chat_model_table():
+    feat_table = {}
+    for cm in chat_models.__all__:
+        feat_table[cm] = {}
+        cls = getattr(chat_models, cm)
+        if issubclass(cls, SimpleChatModel):
+            comparison_cls = SimpleChatModel
+        else:
+            comparison_cls = BaseChatModel
+        for feat in ("_stream", "_astream", "_agenerate"):
+            feat_table[cm][feat] = getattr(cls, feat) != getattr(comparison_cls, feat)
+    final_feats = {
+        k: v
+        for k, v in {**feat_table, **CHAT_MODEL_FEAT_TABLE_CORRECTION}.items()
+        if k not in CHAT_MODEL_IGNORE
+    }
+    header = ["model", "_agenerate", "_stream", "_astream"]
+    title = ["Model", "Invoke", "Async invoke", "Stream", "Async stream"]
+    rows = [title, [":-"] + [":-:"] * (len(title) - 1)]
+    for llm, feats in sorted(final_feats.items()):
+        rows += [[llm, "✅"] + ["✅" if feats.get(h) else "❌" for h in header[1:]]]
+    return "\n".join(["|".join(row) for row in rows])
+
+
+if __name__ == "__main__":
+    llm_page = LLM_TEMPLATE.format(table=get_llm_table())
+    with open(INTEGRATIONS_DIR / "llms" / "index.mdx", "w") as f:
+        f.write(llm_page)
+    chat_model_page = CHAT_MODEL_TEMPLATE.format(table=get_chat_model_table())
+    with open(INTEGRATIONS_DIR / "chat" / "index.mdx", "w") as f:
+        f.write(chat_model_page)