mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-31 16:08:59 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			327 lines
		
	
	
		
			9.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			327 lines
		
	
	
		
			9.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Script for auto-generating api_reference.rst."""
 | |
| import importlib
 | |
| import inspect
 | |
| import typing
 | |
| from enum import Enum
 | |
| from pathlib import Path
 | |
| from typing import Dict, List, Literal, Optional, Sequence, TypedDict, Union
 | |
| 
 | |
| from pydantic import BaseModel
 | |
| 
 | |
| ROOT_DIR = Path(__file__).parents[2].absolute()
 | |
| HERE = Path(__file__).parent
 | |
| 
 | |
| PKG_DIR = ROOT_DIR / "libs" / "langchain" / "langchain"
 | |
| EXP_DIR = ROOT_DIR / "libs" / "experimental" / "langchain_experimental"
 | |
| WRITE_FILE = HERE / "api_reference.rst"
 | |
| EXP_WRITE_FILE = HERE / "experimental_api_reference.rst"
 | |
| 
 | |
| 
 | |
| ClassKind = Literal["TypedDict", "Regular", "Pydantic", "enum"]
 | |
| 
 | |
| 
 | |
| class ClassInfo(TypedDict):
 | |
|     """Information about a class."""
 | |
| 
 | |
|     name: str
 | |
|     """The name of the class."""
 | |
|     qualified_name: str
 | |
|     """The fully qualified name of the class."""
 | |
|     kind: ClassKind
 | |
|     """The kind of the class."""
 | |
|     is_public: bool
 | |
|     """Whether the class is public or not."""
 | |
| 
 | |
| 
 | |
| class FunctionInfo(TypedDict):
 | |
|     """Information about a function."""
 | |
| 
 | |
|     name: str
 | |
|     """The name of the function."""
 | |
|     qualified_name: str
 | |
|     """The fully qualified name of the function."""
 | |
|     is_public: bool
 | |
|     """Whether the function is public or not."""
 | |
| 
 | |
| 
 | |
| class ModuleMembers(TypedDict):
 | |
|     """A dictionary of module members."""
 | |
| 
 | |
|     classes_: Sequence[ClassInfo]
 | |
|     functions: Sequence[FunctionInfo]
 | |
| 
 | |
| 
 | |
| def _load_module_members(module_path: str, namespace: str) -> ModuleMembers:
 | |
|     """Load all members of a module.
 | |
| 
 | |
|     Args:
 | |
|         module_path: Path to the module.
 | |
|         namespace: the namespace of the module.
 | |
| 
 | |
|     Returns:
 | |
|         list: A list of loaded module objects.
 | |
|     """
 | |
|     classes_: List[ClassInfo] = []
 | |
|     functions: List[FunctionInfo] = []
 | |
|     module = importlib.import_module(module_path)
 | |
|     for name, type_ in inspect.getmembers(module):
 | |
|         if not hasattr(type_, "__module__"):
 | |
|             continue
 | |
|         if type_.__module__ != module_path:
 | |
|             continue
 | |
| 
 | |
|         if inspect.isclass(type_):
 | |
|             if type(type_) == typing._TypedDictMeta:  # type: ignore
 | |
|                 kind: ClassKind = "TypedDict"
 | |
|             elif issubclass(type_, Enum):
 | |
|                 kind = "enum"
 | |
|             elif issubclass(type_, BaseModel):
 | |
|                 kind = "Pydantic"
 | |
|             else:
 | |
|                 kind = "Regular"
 | |
| 
 | |
|             classes_.append(
 | |
|                 ClassInfo(
 | |
|                     name=name,
 | |
|                     qualified_name=f"{namespace}.{name}",
 | |
|                     kind=kind,
 | |
|                     is_public=not name.startswith("_"),
 | |
|                 )
 | |
|             )
 | |
|         elif inspect.isfunction(type_):
 | |
|             functions.append(
 | |
|                 FunctionInfo(
 | |
|                     name=name,
 | |
|                     qualified_name=f"{namespace}.{name}",
 | |
|                     is_public=not name.startswith("_"),
 | |
|                 )
 | |
|             )
 | |
|         else:
 | |
|             continue
 | |
| 
 | |
|     return ModuleMembers(
 | |
|         classes_=classes_,
 | |
|         functions=functions,
 | |
|     )
 | |
| 
 | |
| 
 | |
| def _merge_module_members(
 | |
|     module_members: Sequence[ModuleMembers],
 | |
| ) -> ModuleMembers:
 | |
|     """Merge module members."""
 | |
|     classes_: List[ClassInfo] = []
 | |
|     functions: List[FunctionInfo] = []
 | |
|     for module in module_members:
 | |
|         classes_.extend(module["classes_"])
 | |
|         functions.extend(module["functions"])
 | |
| 
 | |
|     return ModuleMembers(
 | |
|         classes_=classes_,
 | |
|         functions=functions,
 | |
|     )
 | |
| 
 | |
| 
 | |
| def _load_package_modules(
 | |
|     package_directory: Union[str, Path], submodule: Optional[str] = None
 | |
| ) -> Dict[str, ModuleMembers]:
 | |
|     """Recursively load modules of a package based on the file system.
 | |
| 
 | |
|     Traversal based on the file system makes it easy to determine which
 | |
|     of the modules/packages are part of the package vs. 3rd party or built-in.
 | |
| 
 | |
|     Parameters:
 | |
|         package_directory: Path to the package directory.
 | |
|         submodule: Optional name of submodule to load.
 | |
| 
 | |
|     Returns:
 | |
|         list: A list of loaded module objects.
 | |
|     """
 | |
|     package_path = (
 | |
|         Path(package_directory)
 | |
|         if isinstance(package_directory, str)
 | |
|         else package_directory
 | |
|     )
 | |
|     modules_by_namespace = {}
 | |
| 
 | |
|     # Get the high level package name
 | |
|     package_name = package_path.name
 | |
| 
 | |
|     # If we are loading a submodule, add it in
 | |
|     if submodule is not None:
 | |
|         package_path = package_path / submodule
 | |
| 
 | |
|     for file_path in package_path.rglob("*.py"):
 | |
|         if file_path.name.startswith("_"):
 | |
|             continue
 | |
| 
 | |
|         relative_module_name = file_path.relative_to(package_path)
 | |
| 
 | |
|         # Skip if any module part starts with an underscore
 | |
|         if any(part.startswith("_") for part in relative_module_name.parts):
 | |
|             continue
 | |
| 
 | |
|         # Get the full namespace of the module
 | |
|         namespace = str(relative_module_name).replace(".py", "").replace("/", ".")
 | |
|         # Keep only the top level namespace
 | |
|         top_namespace = namespace.split(".")[0]
 | |
| 
 | |
|         try:
 | |
|             # If submodule is present, we need to construct the paths in a slightly
 | |
|             # different way
 | |
|             if submodule is not None:
 | |
|                 module_members = _load_module_members(
 | |
|                     f"{package_name}.{submodule}.{namespace}",
 | |
|                     f"{submodule}.{namespace}",
 | |
|                 )
 | |
|             else:
 | |
|                 module_members = _load_module_members(
 | |
|                     f"{package_name}.{namespace}", namespace
 | |
|                 )
 | |
|             # Merge module members if the namespace already exists
 | |
|             if top_namespace in modules_by_namespace:
 | |
|                 existing_module_members = modules_by_namespace[top_namespace]
 | |
|                 _module_members = _merge_module_members(
 | |
|                     [existing_module_members, module_members]
 | |
|                 )
 | |
|             else:
 | |
|                 _module_members = module_members
 | |
| 
 | |
|             modules_by_namespace[top_namespace] = _module_members
 | |
| 
 | |
|         except ImportError as e:
 | |
|             print(f"Error: Unable to import module '{namespace}' with error: {e}")
 | |
| 
 | |
|     return modules_by_namespace
 | |
| 
 | |
| 
 | |
| def _construct_doc(pkg: str, members_by_namespace: Dict[str, ModuleMembers]) -> str:
 | |
|     """Construct the contents of the reference.rst file for the given package.
 | |
| 
 | |
|     Args:
 | |
|         pkg: The package name
 | |
|         members_by_namespace: The members of the package, dict organized by top level
 | |
|                               module contains a list of classes and functions
 | |
|                               inside of the top level namespace.
 | |
| 
 | |
|     Returns:
 | |
|         The contents of the reference.rst file.
 | |
|     """
 | |
|     full_doc = f"""\
 | |
| =======================
 | |
| ``{pkg}`` API Reference
 | |
| =======================
 | |
| 
 | |
| """
 | |
|     namespaces = sorted(members_by_namespace)
 | |
| 
 | |
|     for module in namespaces:
 | |
|         _members = members_by_namespace[module]
 | |
|         classes = _members["classes_"]
 | |
|         functions = _members["functions"]
 | |
|         if not (classes or functions):
 | |
|             continue
 | |
|         section = f":mod:`{pkg}.{module}`"
 | |
|         underline = "=" * (len(section) + 1)
 | |
|         full_doc += f"""\
 | |
| {section}
 | |
| {underline}
 | |
| 
 | |
| .. automodule:: {pkg}.{module}
 | |
|     :no-members:
 | |
|     :no-inherited-members:
 | |
| 
 | |
| """
 | |
| 
 | |
|         if classes:
 | |
|             full_doc += f"""\
 | |
| Classes
 | |
| --------------
 | |
| .. currentmodule:: {pkg}
 | |
| 
 | |
| .. autosummary::
 | |
|     :toctree: {module}
 | |
| """
 | |
| 
 | |
|             for class_ in sorted(classes, key=lambda c: c["qualified_name"]):
 | |
|                 if not class_["is_public"]:
 | |
|                     continue
 | |
| 
 | |
|                 if class_["kind"] == "TypedDict":
 | |
|                     template = "typeddict.rst"
 | |
|                 elif class_["kind"] == "enum":
 | |
|                     template = "enum.rst"
 | |
|                 elif class_["kind"] == "Pydantic":
 | |
|                     template = "pydantic.rst"
 | |
|                 else:
 | |
|                     template = "class.rst"
 | |
| 
 | |
|                 full_doc += f"""\
 | |
|     :template: {template}
 | |
|     
 | |
|     {class_["qualified_name"]}
 | |
|     
 | |
| """
 | |
| 
 | |
|         if functions:
 | |
|             _functions = [f["qualified_name"] for f in functions if f["is_public"]]
 | |
|             fstring = "\n    ".join(sorted(_functions))
 | |
|             full_doc += f"""\
 | |
| Functions
 | |
| --------------
 | |
| .. currentmodule:: {pkg}
 | |
| 
 | |
| .. autosummary::
 | |
|     :toctree: {module}
 | |
|     :template: function.rst
 | |
| 
 | |
|     {fstring}
 | |
| 
 | |
| """
 | |
|     return full_doc
 | |
| 
 | |
| 
 | |
| def _document_langchain_experimental() -> None:
 | |
|     """Document the langchain_experimental package."""
 | |
|     # Generate experimental_api_reference.rst
 | |
|     exp_members = _load_package_modules(EXP_DIR)
 | |
|     exp_doc = ".. _experimental_api_reference:\n\n" + _construct_doc(
 | |
|         "langchain_experimental", exp_members
 | |
|     )
 | |
|     with open(EXP_WRITE_FILE, "w") as f:
 | |
|         f.write(exp_doc)
 | |
| 
 | |
| 
 | |
| def _document_langchain_core() -> None:
 | |
|     """Document the main langchain package."""
 | |
|     # load top level module members
 | |
|     lc_members = _load_package_modules(PKG_DIR)
 | |
| 
 | |
|     # Add additional packages
 | |
|     tools = _load_package_modules(PKG_DIR, "tools")
 | |
|     agents = _load_package_modules(PKG_DIR, "agents")
 | |
|     schema = _load_package_modules(PKG_DIR, "schema")
 | |
| 
 | |
|     lc_members.update(
 | |
|         {
 | |
|             "agents.output_parsers": agents["output_parsers"],
 | |
|             "agents.format_scratchpad": agents["format_scratchpad"],
 | |
|             "tools.render": tools["render"],
 | |
|             "schema.runnable": schema["runnable"],
 | |
|         }
 | |
|     )
 | |
| 
 | |
|     lc_doc = ".. _api_reference:\n\n" + _construct_doc("langchain", lc_members)
 | |
| 
 | |
|     with open(WRITE_FILE, "w") as f:
 | |
|         f.write(lc_doc)
 | |
| 
 | |
| 
 | |
| def main() -> None:
 | |
|     """Generate the reference.rst file for each package."""
 | |
|     _document_langchain_core()
 | |
|     _document_langchain_experimental()
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |