mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-03 03:59:42 +00:00
Wfh/ref links (#8454)
This commit is contained in:
@@ -5,16 +5,22 @@ import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
# Base URL for all class documentation
|
||||
_BASE_URL = "https://api.python.langchain.com/en/latest/"
|
||||
_BASE_URL = "https://api.python.langchain.com/en/latest"
|
||||
|
||||
# Regular expression to match Python code blocks
|
||||
code_block_re = re.compile(r"^(```python\n)(.*?)(```\n)", re.DOTALL | re.MULTILINE)
|
||||
# Regular expression to match langchain import lines
|
||||
_IMPORT_RE = re.compile(r"(from\s+(langchain\.\w+(\.\w+)*?)\s+import\s+)(\w+)")
|
||||
_IMPORT_RE = re.compile(
|
||||
r"from\s+(langchain\.\w+(\.\w+)*?)\s+import\s+"
|
||||
r"((?:\w+(?:,\s*)?)*" # Match zero or more words separated by a comma+optional ws
|
||||
r"(?:\s*\(.*?\))?)", # Match optional parentheses block
|
||||
re.DOTALL, # Match newlines as well
|
||||
)
|
||||
|
||||
_CURRENT_PATH = Path(__file__).parent.absolute()
|
||||
# Directory where generated markdown files are stored
|
||||
@@ -24,6 +30,10 @@ _JSON_PATH = _CURRENT_PATH.parent / "api_reference" / "guide_imports.json"
|
||||
|
||||
def find_files(path):
|
||||
"""Find all MDX files in the given path"""
|
||||
# Check if is file first
|
||||
if os.path.isfile(path):
|
||||
yield path
|
||||
return
|
||||
for root, _, files in os.walk(path):
|
||||
for file in files:
|
||||
if file.endswith(".mdx") or file.endswith(".md"):
|
||||
@@ -37,20 +47,33 @@ def get_full_module_name(module_path, class_name):
|
||||
return inspect.getmodule(class_).__name__
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--docs_dir",
|
||||
type=str,
|
||||
default=_DOCS_DIR,
|
||||
help="Directory where generated markdown files are stored",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function"""
|
||||
args = get_args()
|
||||
global_imports = {}
|
||||
|
||||
for file in find_files(_DOCS_DIR):
|
||||
for file in find_files(args.docs_dir):
|
||||
print(f"Adding links for imports in {file}")
|
||||
|
||||
# replace_imports now returns the import information rather than writing it to a file
|
||||
file_imports = replace_imports(file)
|
||||
|
||||
if file_imports:
|
||||
# Use relative file path as key
|
||||
relative_path = os.path.relpath(file, _DOCS_DIR)
|
||||
doc_url = f"https://python.langchain.com/docs/{relative_path.replace('.mdx', '').replace('.md', '')}"
|
||||
relative_path = (
|
||||
os.path.relpath(file, _DOCS_DIR).replace(".mdx", "").replace(".md", "")
|
||||
)
|
||||
|
||||
doc_url = f"https://python.langchain.com/docs/{relative_path}"
|
||||
for import_info in file_imports:
|
||||
doc_title = import_info["title"]
|
||||
class_name = import_info["imported"]
|
||||
@@ -77,7 +100,8 @@ def _get_doc_title(data: str, file_name: str) -> str:
|
||||
|
||||
|
||||
def replace_imports(file):
|
||||
"""Replace imports in each Python code block with links to their documentation and append the import info in a comment"""
|
||||
"""Replace imports in each Python code block with links to their
|
||||
documentation and append the import info in a comment"""
|
||||
all_imports = []
|
||||
with open(file, "r") as f:
|
||||
data = f.read()
|
||||
@@ -97,37 +121,45 @@ def replace_imports(file):
|
||||
# Process imports in the code block
|
||||
imports = []
|
||||
for import_match in _IMPORT_RE.finditer(code):
|
||||
class_name = import_match.group(4)
|
||||
try:
|
||||
module_path = get_full_module_name(import_match.group(2), class_name)
|
||||
except AttributeError as e:
|
||||
logger.warning(f"Could not find module for {class_name}, {e}")
|
||||
continue
|
||||
except ImportError as e:
|
||||
# Some CentOS OpenSSL issues can cause this to fail
|
||||
logger.warning(f"Failed to load for class {class_name}, {e}")
|
||||
continue
|
||||
module = import_match.group(1)
|
||||
imports_str = (
|
||||
import_match.group(3).replace("(\n", "").replace("\n)", "")
|
||||
) # Handle newlines within parentheses
|
||||
# remove any newline and spaces, then split by comma
|
||||
imported_classes = [
|
||||
imp.strip()
|
||||
for imp in re.split(r",\s*", imports_str.replace("\n", ""))
|
||||
if imp.strip()
|
||||
]
|
||||
for class_name in imported_classes:
|
||||
try:
|
||||
module_path = get_full_module_name(module, class_name)
|
||||
except AttributeError as e:
|
||||
logger.warning(f"Could not find module for {class_name}, {e}")
|
||||
continue
|
||||
except ImportError as e:
|
||||
logger.warning(f"Failed to load for class {class_name}, {e}")
|
||||
continue
|
||||
|
||||
url = (
|
||||
_BASE_URL
|
||||
+ "/"
|
||||
+ module_path.split(".")[1]
|
||||
+ "/"
|
||||
+ module_path
|
||||
+ "."
|
||||
+ class_name
|
||||
+ ".html"
|
||||
)
|
||||
url = (
|
||||
_BASE_URL
|
||||
+ module_path.split(".")[1]
|
||||
+ "/"
|
||||
+ module_path
|
||||
+ "."
|
||||
+ class_name
|
||||
+ ".html"
|
||||
)
|
||||
|
||||
# Add the import information to our list
|
||||
imports.append(
|
||||
{
|
||||
"imported": class_name,
|
||||
"source": import_match.group(2),
|
||||
"docs": url,
|
||||
"title": _DOC_TITLE,
|
||||
}
|
||||
)
|
||||
# Add the import information to our list
|
||||
imports.append(
|
||||
{
|
||||
"imported": class_name,
|
||||
"source": module,
|
||||
"docs": url,
|
||||
"title": _DOC_TITLE,
|
||||
}
|
||||
)
|
||||
|
||||
if imports:
|
||||
all_imports.extend(imports)
|
||||
|
Reference in New Issue
Block a user