William FH
2023-07-26 12:38:58 -07:00
committed by GitHub
parent a612800ef0
commit 01a9b06400
16 changed files with 263 additions and 28 deletions

View File

@@ -0,0 +1,150 @@
import importlib
import inspect
import json
import logging
import os
import re
from pathlib import Path
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Base URL for all class documentation
_BASE_URL = "https://api.python.langchain.com/en/latest/"
# Regular expression to match Python code blocks
code_block_re = re.compile(r"^(```python\n)(.*?)(```\n)", re.DOTALL | re.MULTILINE)
# Regular expression to match langchain import lines
_IMPORT_RE = re.compile(r"(from\s+(langchain\.\w+(\.\w+)*?)\s+import\s+)(\w+)")
_CURRENT_PATH = Path(__file__).parent.absolute()
# Directory where generated markdown files are stored
_DOCS_DIR = _CURRENT_PATH / "docs"
_JSON_PATH = _CURRENT_PATH.parent / "api_reference" / "guide_imports.json"
def find_files(path):
"""Find all MDX files in the given path"""
for root, _, files in os.walk(path):
for file in files:
if file.endswith(".mdx") or file.endswith(".md"):
yield os.path.join(root, file)
def get_full_module_name(module_path, class_name):
"""Get full module name using inspect"""
module = importlib.import_module(module_path)
class_ = getattr(module, class_name)
return inspect.getmodule(class_).__name__
def main():
"""Main function"""
global_imports = {}
for file in find_files(_DOCS_DIR):
print(f"Adding links for imports in {file}")
# replace_imports now returns the import information rather than writing it to a file
file_imports = replace_imports(file)
if file_imports:
# Use relative file path as key
relative_path = os.path.relpath(file, _DOCS_DIR)
doc_url = f"https://python.langchain.com/docs/{relative_path.replace('.mdx', '').replace('.md', '')}"
for import_info in file_imports:
doc_title = import_info["title"]
class_name = import_info["imported"]
if class_name not in global_imports:
global_imports[class_name] = {}
global_imports[class_name][doc_title] = doc_url
# Write the global imports information to a JSON file
with _JSON_PATH.open("w") as f:
json.dump(global_imports, f)
def _get_doc_title(data: str, file_name: str) -> str:
try:
return re.findall(r"^#\s+(.*)", data, re.MULTILINE)[0]
except IndexError:
pass
# Parse the rst-style titles
try:
return re.findall(r"^(.*)\n=+\n", data, re.MULTILINE)[0]
except IndexError:
return file_name
def replace_imports(file):
"""Replace imports in each Python code block with links to their documentation and append the import info in a comment"""
all_imports = []
with open(file, "r") as f:
data = f.read()
file_name = os.path.basename(file)
_DOC_TITLE = _get_doc_title(data, file_name)
def replacer(match):
# Extract the code block content
code = match.group(2)
# Replace if any import comment exists
# TODO: Use our own custom <code> component rather than this
# injection method
existing_comment_re = re.compile(r"^<!--IMPORTS:.*?-->\n", re.MULTILINE)
code = existing_comment_re.sub("", code)
# Process imports in the code block
imports = []
for import_match in _IMPORT_RE.finditer(code):
class_name = import_match.group(4)
try:
module_path = get_full_module_name(import_match.group(2), class_name)
except AttributeError as e:
logger.warning(f"Could not find module for {class_name}, {e}")
continue
except ImportError as e:
# Some CentOS OpenSSL issues can cause this to fail
logger.warning(f"Failed to load for class {class_name}, {e}")
continue
url = (
_BASE_URL
+ "/"
+ module_path.split(".")[1]
+ "/"
+ module_path
+ "."
+ class_name
+ ".html"
)
# Add the import information to our list
imports.append(
{
"imported": class_name,
"source": import_match.group(2),
"docs": url,
"title": _DOC_TITLE,
}
)
if imports:
all_imports.extend(imports)
# Create a unique comment containing the import information
import_comment = f"<!--IMPORTS:{json.dumps(imports)}-->"
# Inject the import comment at the start of the code block
return match.group(1) + import_comment + "\n" + code + match.group(3)
else:
# If there are no imports, return the original match
return match.group(0)
# Use re.sub to replace each Python code block
data = code_block_re.sub(replacer, data)
with open(file, "w") as f:
f.write(data)
return all_imports
if __name__ == "__main__":
main()

View File

@@ -21,7 +21,7 @@ function Imports({ imports }) {
</h4>
<ul style={{ paddingBottom: "1rem" }}>
{imports.map(({ imported, source, docs }) => (
<li>
<li key={imported}>
<a href={docs}>
<span>{imported}</span>
</a>{" "}
@@ -34,14 +34,25 @@ function Imports({ imports }) {
}
export default function CodeBlockWrapper({ children, ...props }) {
// Initialize imports as an empty array
let imports = [];
// Check if children is a string
if (typeof children === "string") {
return <CodeBlock {...props}>{children}</CodeBlock>;
// Search for an IMPORTS comment in the code
const match = /<!--IMPORTS:(.*?)-->\n/.exec(children);
if (match) {
imports = JSON.parse(match[1]);
children = children.replace(match[0], "");
}
} else if (children.imports) {
imports = children.imports;
}
return (
<>
<CodeBlock {...props}>{children.content}</CodeBlock>
<Imports imports={children.imports} />
<CodeBlock {...props}>{children}</CodeBlock>
{imports.length > 0 && <Imports imports={imports} />}
</>
);
}
}

View File

@@ -1,10 +1,32 @@
#!/bin/bash
### See: https://github.com/urllib3/urllib3/issues/2168
# Requests lib breaks for old SSL versions,
# which are defaults on Amazon Linux 2 (which Vercel uses for builds)
yum -y update
yum remove openssl-devel -y
yum install gcc bzip2-devel libffi-devel zlib-devel wget tar -y
yum install openssl11 -y
yum install openssl11-devel -y
# Install python 3.11 to connect with openSSL 1.1.1
wget https://www.python.org/ftp/python/3.11.4/Python-3.11.4.tgz
tar xzf Python-3.11.4.tgz
cd Python-3.11.4
./configure
make altinstall
# Check python version
echo "Python Version"
python3.11 --version
cd ..
python3 --version
python3 -m venv .venv
###
# Install nbdev and generate docs
cd ..
python3.11 -m venv .venv
source .venv/bin/activate
python3 -m pip install -r vercel_requirements.txt
python3.11 -m pip install --upgrade pip
python3.11 -m pip install -r vercel_requirements.txt
cp -r extras/* docs_skeleton/docs
cd docs_skeleton
nbdoc_build
python3.11 generate_api_reference_links.py