Files
langchain/docs/api_reference/_extensions/docstring_preprocessor.py
2025-09-25 22:29:28 -04:00

186 lines
6.1 KiB
Python

"""Sphinx extension to preprocess docstrings before napoleon conversion.
This extension converts markdown code blocks to Sphinx code-block directives
in docstrings before the napoleon extension processes them.
"""
import re
from typing import Any, Dict, List, Optional
from sphinx.application import Sphinx
from sphinx.ext.autodoc import Documenter
def convert_markdown_admonitions_in_docstring(docstring: str) -> str:
"""
Convert markdown-style admonitions to Sphinx directives.
Args:
docstring: The docstring content to process
Returns:
Docstring with markdown admonitions converted to Sphinx directives
"""
if not docstring or "!!!" not in docstring:
return docstring
lines = docstring.split("\n")
result_lines = []
for line in lines:
# Match patterns like "!!! deprecated 0.2.0", "!!! warning", etc.
match = re.match(r"^(\s*)!!!\s+(\w+)(?:\s+(.+))?\s*$", line)
if match:
leading_indent = match.group(1)
directive_type = match.group(2)
directive_arg = match.group(3) or ""
# Convert to Sphinx directive format
if directive_arg:
result_lines.append(
f"{leading_indent}.. {directive_type}:: {directive_arg}"
)
else:
result_lines.append(f"{leading_indent}.. {directive_type}::")
else:
result_lines.append(line)
return "\n".join(result_lines)
def convert_markdown_codeblocks_in_docstring(docstring: str) -> str:
"""
Convert markdown code blocks to Sphinx code-block directives in a docstring.
Args:
docstring: The docstring content to process
Returns:
Docstring with markdown code blocks converted to Sphinx directives
"""
if not docstring or "```" not in docstring:
return docstring
lines = docstring.split("\n")
result_lines = []
i = 0
while i < len(lines):
line = lines[i]
# Check if this line starts a code block
match = re.match(r"^(\s*)```(\w*)\s*$", line)
if match:
leading_indent = match.group(1)
language = match.group(2) or ""
# Find the closing ``` with same indentation
closing_idx = None
for j in range(i + 1, len(lines)):
if re.match(rf"^{re.escape(leading_indent)}```\s*$", lines[j]):
closing_idx = j
break
if closing_idx is not None:
# Extract code content
code_lines = lines[i + 1 : closing_idx]
# Build Sphinx code-block directive
if language:
result_lines.append(f"{leading_indent}.. code-block:: {language}")
else:
result_lines.append(f"{leading_indent}.. code-block::")
result_lines.append("") # Empty line after directive
# Add indented code content
# Normalize code by removing common leading whitespace
if code_lines:
# Find the minimum indentation of non-empty lines
non_empty_lines = [line for line in code_lines if line.strip()]
if non_empty_lines:
min_indent = min(
len(line) - len(line.lstrip()) for line in non_empty_lines
)
# Remove the common leading whitespace
normalized_lines = []
for line in code_lines:
if line.strip():
# Remove common indentation but preserve relative indentation
normalized_lines.append(
line[min_indent:]
if len(line) > min_indent
else line.lstrip()
)
else:
normalized_lines.append("")
else:
normalized_lines = code_lines
else:
normalized_lines = []
# Add the normalized code with Sphinx indentation (4 spaces from directive)
for code_line in normalized_lines:
if code_line.strip():
result_lines.append(f"{leading_indent} {code_line}")
else:
result_lines.append(leading_indent.rstrip())
# Skip to after the closing ```
i = closing_idx + 1
else:
# No matching closing ```, treat as regular line
result_lines.append(line)
i += 1
else:
result_lines.append(line)
i += 1
return "\n".join(result_lines)
def process_docstring(
app: Sphinx,
what: str,
name: str,
obj: Any,
options: Dict[str, Any],
lines: List[str],
) -> None:
"""
Process docstring to convert markdown elements to Sphinx directives.
This function is called by Sphinx's autodoc-process-docstring event
before napoleon processes the docstring.
"""
if not lines:
return
# Join lines to get the full docstring
docstring = "\n".join(lines)
# Convert markdown admonitions first
converted = convert_markdown_admonitions_in_docstring(docstring)
# Convert markdown code blocks
converted = convert_markdown_codeblocks_in_docstring(converted)
# Only update if there were changes
if converted != docstring:
# Clear the original lines and replace with converted content
lines.clear()
lines.extend(converted.split("\n"))
def setup(app: Sphinx) -> Dict[str, Any]:
"""Setup the Sphinx extension."""
# Connect our docstring processor to run before napoleon
# Priority 500 ensures it runs before napoleon (which has default priority)
app.connect("autodoc-process-docstring", process_docstring, priority=500)
return {
"version": "1.0.0",
"parallel_read_safe": True,
"parallel_write_safe": True,
}