mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-23 07:09:31 +00:00
infra: lint new docs to match doc loader template (#22867)
This commit is contained in:
parent
8bd368d07e
commit
f9a6d5c845
@ -17,6 +17,18 @@ CHAT_MODEL_HEADERS = (
|
||||
)
|
||||
CHAT_MODEL_REGEX = r".*".join(CHAT_MODEL_HEADERS)
|
||||
|
||||
DOCUMENT_LOADER_HEADERS = (
|
||||
"## Overview",
|
||||
"### Integration details",
|
||||
"### Loader features",
|
||||
"## Setup",
|
||||
"## Instantiation",
|
||||
"## Load",
|
||||
"## Lazy Load",
|
||||
"## API reference",
|
||||
)
|
||||
DOCUMENT_LOADER_REGEX = r".*".join(DOCUMENT_LOADER_HEADERS)
|
||||
|
||||
|
||||
def check_chat_model(path: Path) -> None:
|
||||
with open(path, "r") as f:
|
||||
@ -29,12 +41,29 @@ def check_chat_model(path: Path) -> None:
|
||||
)
|
||||
|
||||
|
||||
def check_document_loader(path: Path) -> None:
|
||||
with open(path, "r") as f:
|
||||
doc = f.read()
|
||||
if not re.search(DOCUMENT_LOADER_REGEX, doc, re.DOTALL):
|
||||
raise ValueError(
|
||||
f"Document {path} does not match the DocumentLoader Integration page template. "
|
||||
f"Please see https://github.com/langchain-ai/langchain/issues/22866 for "
|
||||
f"instructions on how to correctly format a DocumentLoader Integration page."
|
||||
)
|
||||
|
||||
|
||||
def main(*new_doc_paths: Union[str, Path]) -> None:
|
||||
for path in new_doc_paths:
|
||||
path = Path(path).resolve().absolute()
|
||||
if CURR_DIR.parent / "docs" / "integrations" / "chat" in path.parents:
|
||||
print(f"Checking chat model page {path}")
|
||||
check_chat_model(path)
|
||||
elif (
|
||||
CURR_DIR.parent / "docs" / "integrations" / "document_loaders"
|
||||
in path.parents
|
||||
):
|
||||
print(f"Checking document loader page {path}")
|
||||
check_document_loader(path)
|
||||
else:
|
||||
continue
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user