cli: standard tests in cli, test that they run, skip vectorstore tests (#28521)

This commit is contained in:
Erick Friis 2024-12-05 00:38:32 -08:00 committed by GitHub
parent c5acedddc2
commit 43c35d19d4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
36 changed files with 1573 additions and 631 deletions

View File

@ -272,6 +272,7 @@ if __name__ == "__main__":
# TODO: update to include all packages that rely on standard-tests (all partner packages)
# note: won't run on external repo partners
dirs_to_run["lint"].add("libs/standard-tests")
dirs_to_run["test"].add("libs/standard-tests")
dirs_to_run["test"].add("libs/partners/mistralai")
dirs_to_run["test"].add("libs/partners/openai")
dirs_to_run["test"].add("libs/partners/anthropic")
@ -279,8 +280,9 @@ if __name__ == "__main__":
dirs_to_run["test"].add("libs/partners/groq")
elif file.startswith("libs/cli"):
# todo: add cli makefile
pass
dirs_to_run["lint"].add("libs/cli")
dirs_to_run["test"].add("libs/cli")
elif file.startswith("libs/partners"):
partner_dir = file.split("/")[2]
if os.path.isdir(f"libs/partners/{partner_dir}") and [

2
libs/cli/.gitignore vendored
View File

@ -158,3 +158,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.integration_test

View File

@ -1,8 +1,47 @@
lint lint_diff:
poetry run poe lint
test:
poetry run poe test
######################
# LINTING AND FORMATTING
######################
format:
poetry run poe format
# Define a variable for Python and notebook files.
PYTHON_FILES=.
MYPY_CACHE=.mypy_cache
lint format: PYTHON_FILES=.
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/cli --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
lint_package: PYTHON_FILES=langchain_cli
lint_tests: PYTHON_FILES=tests
lint_tests: MYPY_CACHE=.mypy_cache_test
lint lint_diff lint_package lint_tests:
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
format format_diff:
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I --fix $(PYTHON_FILES)
test tests: _test _e2e_test
PYTHON = .venv/bin/python
_test:
poetry run pytest tests
# custom integration testing for cli integration flow
# currently ignores vectorstores test because lacks implementation
_e2e_test:
rm -rf .integration_test
mkdir .integration_test
cd .integration_test && \
python3 -m venv .venv && \
$(PYTHON) -m pip install --upgrade poetry && \
$(PYTHON) -m pip install -e .. && \
$(PYTHON) -m langchain_cli.cli integration new --name parrot-link --name-class ParrotLink && \
cd langchain-parrot-link && \
poetry install --with lint,typing,test && \
poetry run pip install -e ../../../standard-tests && \
make format lint tests && \
poetry install --with test_integration && \
rm tests/integration_tests/test_vectorstores.py && \
make integration_test

View File

@ -1,3 +1,4 @@
# type: ignore
"""
Development Scripts for template packages
"""

View File

@ -33,13 +33,13 @@ lint_tests: PYTHON_FILES=tests
lint_tests: MYPY_CACHE=.mypy_cache_test
lint lint_diff lint_package lint_tests:
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
format format_diff:
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff format $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff --select I --fix $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || poetry run ruff check --select I --fix $(PYTHON_FILES)
spell_check:
poetry run codespell --toml pyproject.toml

View File

@ -1,8 +1,11 @@
from importlib import metadata
from __module_name__.chat_models import Chat__ModuleName__
from __module_name__.document_loaders import __ModuleName__Loader
from __module_name__.embeddings import __ModuleName__Embeddings
from __module_name__.llms import __ModuleName__LLM
from __module_name__.retrievers import __ModuleName__Retriever
from __module_name__.toolkits import __ModuleName__Toolkit
from __module_name__.tools import __ModuleName__Tool
from __module_name__.vectorstores import __ModuleName__VectorStore
try:
@ -14,8 +17,11 @@ del metadata # optional, avoids polluting the results of dir(__package__)
__all__ = [
"Chat__ModuleName__",
"__ModuleName__LLM",
"__ModuleName__VectorStore",
"__ModuleName__Embeddings",
"__ModuleName__Loader",
"__ModuleName__Retriever",
"__ModuleName__Toolkit",
"__ModuleName__Tool",
"__version__",
]

View File

@ -1,13 +1,19 @@
"""__ModuleName__ chat models."""
from typing import Any, List, Optional
from typing import Any, Dict, Iterator, List, Optional
from langchain_core.callbacks import (
CallbackManagerForLLMRun,
)
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import BaseMessage
from langchain_core.outputs import ChatResult
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import (
AIMessage,
AIMessageChunk,
BaseMessage,
)
from langchain_core.messages.ai import UsageMetadata
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
from pydantic import Field
class Chat__ModuleName__(BaseChatModel):
@ -15,6 +21,8 @@ class Chat__ModuleName__(BaseChatModel):
# https://github.com/langchain-ai/langchain/blob/7ff05357bac6eaedf5058a2af88f23a1817d40fe/libs/partners/openai/langchain_openai/chat_models/base.py#L1120
"""__ModuleName__ chat model integration.
The default implementation echoes the first `parrot_buffer_length` characters of the input.
# TODO: Replace with relevant packages, env vars.
Setup:
Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.
@ -258,7 +266,36 @@ class Chat__ModuleName__(BaseChatModel):
""" # noqa: E501
# TODO: This method must be implemented to generate chat responses.
model_name: str = Field(alias="model")
"""The name of the model"""
parrot_buffer_length: int
"""The number of characters from the last message of the prompt to be echoed."""
temperature: Optional[float] = None
max_tokens: Optional[int] = None
timeout: Optional[int] = None
stop: Optional[List[str]] = None
max_retries: int = 2
@property
def _llm_type(self) -> str:
"""Return type of chat model."""
return "chat-__package_name_short__"
@property
def _identifying_params(self) -> Dict[str, Any]:
"""Return a dictionary of identifying parameters.
This information is used by the LangChain callback system, which
is used for tracing purposes make it possible to monitor LLMs.
"""
return {
# The model name allows users to specify custom token counting
# rules in LLM monitoring applications (e.g., in LangSmith users
# can provide per token pricing for their model and monitor
# costs for the given LLM.)
"model_name": self.model_name,
}
def _generate(
self,
messages: List[BaseMessage],
@ -266,16 +303,101 @@ class Chat__ModuleName__(BaseChatModel):
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> ChatResult:
raise NotImplementedError()
"""Override the _generate method to implement the chat model logic.
# TODO: Implement if Chat__ModuleName__ supports streaming. Otherwise delete method.
# def _stream(
# self,
# messages: List[BaseMessage],
# stop: Optional[List[str]] = None,
# run_manager: Optional[CallbackManagerForLLMRun] = None,
# **kwargs: Any,
# ) -> Iterator[ChatGenerationChunk]:
This can be a call to an API, a call to a local model, or any other
implementation that generates a response to the input prompt.
Args:
messages: the prompt composed of a list of messages.
stop: a list of strings on which the model should stop generating.
If generation stops due to a stop token, the stop token itself
SHOULD BE INCLUDED as part of the output. This is not enforced
across models right now, but it's a good practice to follow since
it makes it much easier to parse the output of the model
downstream and understand why generation stopped.
run_manager: A run manager with callbacks for the LLM.
"""
# Replace this with actual logic to generate a response from a list
# of messages.
last_message = messages[-1]
tokens = last_message.content[: self.parrot_buffer_length]
ct_input_tokens = sum(len(message.content) for message in messages)
ct_output_tokens = len(tokens)
message = AIMessage(
content=tokens,
additional_kwargs={}, # Used to add additional payload to the message
response_metadata={ # Use for response metadata
"time_in_seconds": 3,
},
usage_metadata={
"input_tokens": ct_input_tokens,
"output_tokens": ct_output_tokens,
"total_tokens": ct_input_tokens + ct_output_tokens,
},
)
##
generation = ChatGeneration(message=message)
return ChatResult(generations=[generation])
def _stream(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> Iterator[ChatGenerationChunk]:
"""Stream the output of the model.
This method should be implemented if the model can generate output
in a streaming fashion. If the model does not support streaming,
do not implement it. In that case streaming requests will be automatically
handled by the _generate method.
Args:
messages: the prompt composed of a list of messages.
stop: a list of strings on which the model should stop generating.
If generation stops due to a stop token, the stop token itself
SHOULD BE INCLUDED as part of the output. This is not enforced
across models right now, but it's a good practice to follow since
it makes it much easier to parse the output of the model
downstream and understand why generation stopped.
run_manager: A run manager with callbacks for the LLM.
"""
last_message = messages[-1]
tokens = str(last_message.content[: self.parrot_buffer_length])
ct_input_tokens = sum(len(message.content) for message in messages)
for token in tokens:
usage_metadata = UsageMetadata(
{
"input_tokens": ct_input_tokens,
"output_tokens": 1,
"total_tokens": ct_input_tokens + 1,
}
)
ct_input_tokens = 0
chunk = ChatGenerationChunk(
message=AIMessageChunk(content=token, usage_metadata=usage_metadata)
)
if run_manager:
# This is optional in newer versions of LangChain
# The on_llm_new_token will be called automatically
run_manager.on_llm_new_token(token, chunk=chunk)
yield chunk
# Let's add some other information (e.g., response metadata)
chunk = ChatGenerationChunk(
message=AIMessageChunk(content="", response_metadata={"time_in_sec": 3})
)
if run_manager:
# This is optional in newer versions of LangChain
# The on_llm_new_token will be called automatically
run_manager.on_llm_new_token(token, chunk=chunk)
yield chunk
# TODO: Implement if Chat__ModuleName__ supports async streaming. Otherwise delete.
# async def _astream(
@ -294,8 +416,3 @@ class Chat__ModuleName__(BaseChatModel):
# run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
# **kwargs: Any,
# ) -> ChatResult:
@property
def _llm_type(self) -> str:
"""Return type of chat model."""
return "chat-__package_name_short__"

View File

@ -8,7 +8,8 @@ class __ModuleName__Embeddings(Embeddings):
# TODO: Replace with relevant packages, env vars.
Setup:
Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.
Install ``__package_name__`` and set environment variable
``__MODULE_NAME___API_KEY``.
.. code-block:: bash
@ -70,21 +71,26 @@ class __ModuleName__Embeddings(Embeddings):
"""
def __init__(self, model: str):
self.model = model
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Embed search docs."""
raise NotImplementedError
return [[0.5, 0.6, 0.7] for _ in texts]
def embed_query(self, text: str) -> List[float]:
"""Embed query text."""
raise NotImplementedError
return self.embed_documents([text])[0]
# only keep aembed_documents and aembed_query if they're implemented!
# delete them otherwise to use the base class' default
# implementation, which calls the sync version in an executor
async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
"""Asynchronous Embed search docs."""
raise NotImplementedError
# optional: add custom async implementations here
# you can also delete these, and the base class will
# use the default implementation, which calls the sync
# version in an async executor:
async def aembed_query(self, text: str) -> List[float]:
"""Asynchronous Embed query text."""
raise NotImplementedError
# async def aembed_documents(self, texts: List[str]) -> List[List[float]]:
# """Asynchronous Embed search docs."""
# ...
# async def aembed_query(self, text: str) -> List[float]:
# """Asynchronous Embed query text."""
# ...

View File

@ -1,155 +0,0 @@
"""__ModuleName__ large language models."""
from typing import (
Any,
List,
Optional,
)
from langchain_core.callbacks import (
CallbackManagerForLLMRun,
)
from langchain_core.language_models import BaseLLM
from langchain_core.outputs import LLMResult
class __ModuleName__LLM(BaseLLM):
"""__ModuleName__ completion model integration.
# TODO: Replace with relevant packages, env vars.
Setup:
Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.
.. code-block:: bash
pip install -U __package_name__
export __MODULE_NAME___API_KEY="your-api-key"
# TODO: Populate with relevant params.
Key init args completion params:
model: str
Name of __ModuleName__ model to use.
temperature: float
Sampling temperature.
max_tokens: Optional[int]
Max number of tokens to generate.
# TODO: Populate with relevant params.
Key init args client params:
timeout: Optional[float]
Timeout for requests.
max_retries: int
Max number of retries.
api_key: Optional[str]
__ModuleName__ API key. If not passed in will be read from env var __MODULE_NAME___API_KEY.
See full list of supported init args and their descriptions in the params section.
# TODO: Replace with relevant init params.
Instantiate:
.. code-block:: python
from __module_name__ import __ModuleName__LLM
llm = __ModuleName__LLM(
model="...",
temperature=0,
max_tokens=None,
timeout=None,
max_retries=2,
# api_key="...",
# other params...
)
Invoke:
.. code-block:: python
input_text = "The meaning of life is "
llm.invoke(input_text)
.. code-block:: python
# TODO: Example output.
# TODO: Delete if token-level streaming isn't supported.
Stream:
.. code-block:: python
for chunk in llm.stream(input_text):
print(chunk)
.. code-block:: python
# TODO: Example output.
.. code-block:: python
''.join(llm.stream(input_text))
.. code-block:: python
# TODO: Example output.
# TODO: Delete if native async isn't supported.
Async:
.. code-block:: python
await llm.ainvoke(input_text)
# stream:
# async for chunk in (await llm.astream(input_text))
# batch:
# await llm.abatch([input_text])
.. code-block:: python
# TODO: Example output.
"""
# TODO: This method must be implemented to generate text completions.
def _generate(
self,
prompts: List[str],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> LLMResult:
raise NotImplementedError
# TODO: Implement if __ModuleName__LLM supports async generation. Otherwise
# delete method.
# async def _agenerate(
# self,
# prompts: List[str],
# stop: Optional[List[str]] = None,
# run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
# **kwargs: Any,
# ) -> LLMResult:
# raise NotImplementedError
# TODO: Implement if __ModuleName__LLM supports streaming. Otherwise delete method.
# def _stream(
# self,
# prompt: str,
# stop: Optional[List[str]] = None,
# run_manager: Optional[CallbackManagerForLLMRun] = None,
# **kwargs: Any,
# ) -> Iterator[GenerationChunk]:
# raise NotImplementedError
# TODO: Implement if __ModuleName__LLM supports async streaming. Otherwise delete
# method.
# async def _astream(
# self,
# prompt: str,
# stop: Optional[List[str]] = None,
# run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
# **kwargs: Any,
# ) -> AsyncIterator[GenerationChunk]:
# raise NotImplementedError
@property
def _llm_type(self) -> str:
"""Return type of LLM."""
return "__package_name_short__-llm"

View File

@ -1,7 +1,8 @@
"""__ModuleName__ retrievers."""
from typing import List
from typing import Any, List
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
@ -13,7 +14,8 @@ class __ModuleName__Retriever(BaseRetriever):
# TODO: Replace with relevant packages, env vars, etc.
Setup:
Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``.
Install ``__package_name__`` and set environment variable
``__MODULE_NAME___API_KEY``.
.. code-block:: bash
@ -82,8 +84,24 @@ class __ModuleName__Retriever(BaseRetriever):
# TODO: Example output.
""" # noqa: E501
"""
k: int = 3
# TODO: This method must be implemented to retrieve documents.
def _get_relevant_documents(self, query: str) -> List[Document]:
raise NotImplementedError()
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun, **kwargs: Any
) -> List[Document]:
k = kwargs.get("k", self.k)
return [
Document(page_content=f"Result {i} for query: {query}") for i in range(k)
]
# optional: add custom async implementations here
# async def _aget_relevant_documents(
# self,
# query: str,
# *,
# run_manager: AsyncCallbackManagerForRetrieverRun,
# **kwargs: Any,
# ) -> List[Document]: ...

View File

@ -2,10 +2,10 @@
from typing import List
from langchain_core.tools import BaseTool, BaseToolKit
from langchain_core.tools import BaseTool, BaseToolkit
class __ModuleName__Toolkit(BaseToolKit):
class __ModuleName__Toolkit(BaseToolkit):
# TODO: Replace all TODOs in docstring. See example docstring:
# https://github.com/langchain-ai/langchain/blob/c123cb2b304f52ab65db4714eeec46af69a861ec/libs/community/langchain_community/agent_toolkits/sql/toolkit.py#L19
"""__ModuleName__ toolkit.

View File

@ -6,10 +6,10 @@ from langchain_core.callbacks import (
CallbackManagerForToolRun,
)
from langchain_core.tools import BaseTool
from pydantic import BaseModel
from pydantic import BaseModel, Field
class __ModuleName__Input(BaseModel):
class __ModuleName__ToolInput(BaseModel):
"""Input schema for __ModuleName__ tool.
This docstring is **not** part of what is sent to the model when performing tool
@ -18,12 +18,11 @@ class __ModuleName__Input(BaseModel):
"""
# TODO: Add input args and descriptions.
# a: int = Field(..., description="first number")
# b: int = Field(0, description="second number")
...
a: int = Field(..., description="first number to add")
b: int = Field(..., description="second number to add")
class __ModuleName__Tool(BaseTool):
class __ModuleName__Tool(BaseTool): # type: ignore[override]
"""__ModuleName__ tool.
Setup:
@ -69,24 +68,26 @@ class __ModuleName__Tool(BaseTool):
"""The name that is passed to the model when performing tool calling."""
description: str = "TODO: Tool description."
"""The description that is passed to the model when performing tool calling."""
args_schema: Type[BaseModel] = __ModuleName__Input
args_schema: Type[BaseModel] = __ModuleName__ToolInput
"""The schema that is passed to the model when performing tool calling."""
# TODO: Add any other init params for the tool.
# param1: Optional[str]
# """param1 determines foobar"""
# TODO: Replaced *args with real tool arguments.
# TODO: Replaced (a, b) with real tool arguments.
def _run(
self, *args, run_manager: Optional[CallbackManagerForToolRun] = None
self, a: int, b: int, *, run_manager: Optional[CallbackManagerForToolRun] = None
) -> str:
raise NotImplementedError
return str(a + b + 80)
# TODO: Implement if tool has native async functionality, otherwise delete.
# async def _arun(
# self,
# *args,
# a: int,
# b: int,
# *,
# run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
# ) -> str:
# ...

View File

@ -2,8 +2,6 @@
from __future__ import annotations
import asyncio
from functools import partial
from typing import (
TYPE_CHECKING,
Any,
@ -160,6 +158,8 @@ class __ModuleName__VectorStore(VectorStore):
""" # noqa: E501
_database: dict[str, tuple[Document, list[float]]] = {}
def add_texts(
self,
texts: Iterable[str],
@ -168,65 +168,70 @@ class __ModuleName__VectorStore(VectorStore):
) -> List[str]:
raise NotImplementedError
async def aadd_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> List[str]:
return await asyncio.get_running_loop().run_in_executor(
None, partial(self.add_texts, **kwargs), texts, metadatas
)
# optional: add custom async implementations
# async def aadd_texts(
# self,
# texts: Iterable[str],
# metadatas: Optional[List[dict]] = None,
# **kwargs: Any,
# ) -> List[str]:
# return await asyncio.get_running_loop().run_in_executor(
# None, partial(self.add_texts, **kwargs), texts, metadatas
# )
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
raise NotImplementedError
async def adelete(
self, ids: Optional[List[str]] = None, **kwargs: Any
) -> Optional[bool]:
raise NotImplementedError
# optional: add custom async implementations
# async def adelete(
# self, ids: Optional[List[str]] = None, **kwargs: Any
# ) -> Optional[bool]:
# raise NotImplementedError
def similarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
raise NotImplementedError
async def asimilarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search, query, k=k, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
# optional: add custom async implementations
# async def asimilarity_search(
# self, query: str, k: int = 4, **kwargs: Any
# ) -> List[Document]:
# # This is a temporary workaround to make the similarity search
# # asynchronous. The proper solution is to make the similarity search
# # asynchronous in the vector store implementations.
# func = partial(self.similarity_search, query, k=k, **kwargs)
# return await asyncio.get_event_loop().run_in_executor(None, func)
def similarity_search_with_score(
self, *args: Any, **kwargs: Any
) -> List[Tuple[Document, float]]:
raise NotImplementedError
async def asimilarity_search_with_score(
self, *args: Any, **kwargs: Any
) -> List[Tuple[Document, float]]:
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search_with_score, *args, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
# optional: add custom async implementations
# async def asimilarity_search_with_score(
# self, *args: Any, **kwargs: Any
# ) -> List[Tuple[Document, float]]:
# # This is a temporary workaround to make the similarity search
# # asynchronous. The proper solution is to make the similarity search
# # asynchronous in the vector store implementations.
# func = partial(self.similarity_search_with_score, *args, **kwargs)
# return await asyncio.get_event_loop().run_in_executor(None, func)
def similarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
raise NotImplementedError
async def asimilarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
# optional: add custom async implementations
# async def asimilarity_search_by_vector(
# self, embedding: List[float], k: int = 4, **kwargs: Any
# ) -> List[Document]:
# # This is a temporary workaround to make the similarity search
# # asynchronous. The proper solution is to make the similarity search
# # asynchronous in the vector store implementations.
# func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
# return await asyncio.get_event_loop().run_in_executor(None, func)
def max_marginal_relevance_search(
self,
@ -238,26 +243,27 @@ class __ModuleName__VectorStore(VectorStore):
) -> List[Document]:
raise NotImplementedError
async def amax_marginal_relevance_search(
self,
query: str,
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(
self.max_marginal_relevance_search,
query,
k=k,
fetch_k=fetch_k,
lambda_mult=lambda_mult,
**kwargs,
)
return await asyncio.get_event_loop().run_in_executor(None, func)
# optional: add custom async implementations
# async def amax_marginal_relevance_search(
# self,
# query: str,
# k: int = 4,
# fetch_k: int = 20,
# lambda_mult: float = 0.5,
# **kwargs: Any,
# ) -> List[Document]:
# # This is a temporary workaround to make the similarity search
# # asynchronous. The proper solution is to make the similarity search
# # asynchronous in the vector store implementations.
# func = partial(
# self.max_marginal_relevance_search,
# query,
# k=k,
# fetch_k=fetch_k,
# lambda_mult=lambda_mult,
# **kwargs,
# )
# return await asyncio.get_event_loop().run_in_executor(None, func)
def max_marginal_relevance_search_by_vector(
self,
@ -269,15 +275,16 @@ class __ModuleName__VectorStore(VectorStore):
) -> List[Document]:
raise NotImplementedError
async def amax_marginal_relevance_search_by_vector(
self,
embedding: List[float],
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
raise NotImplementedError
# optional: add custom async implementations
# async def amax_marginal_relevance_search_by_vector(
# self,
# embedding: List[float],
# k: int = 4,
# fetch_k: int = 20,
# lambda_mult: float = 0.5,
# **kwargs: Any,
# ) -> List[Document]:
# raise NotImplementedError
@classmethod
def from_texts(
@ -289,17 +296,18 @@ class __ModuleName__VectorStore(VectorStore):
) -> VST:
raise NotImplementedError
@classmethod
async def afrom_texts(
cls: Type[VST],
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> VST:
return await asyncio.get_running_loop().run_in_executor(
None, partial(cls.from_texts, **kwargs), texts, embedding, metadatas
)
# optional: add custom async implementations
# @classmethod
# async def afrom_texts(
# cls: Type[VST],
# texts: List[str],
# embedding: Embeddings,
# metadatas: Optional[List[dict]] = None,
# **kwargs: Any,
# ) -> VST:
# return await asyncio.get_running_loop().run_in_executor(
# None, partial(cls.from_texts, **kwargs), texts, embedding, metadatas
# )
def _select_relevance_score_fn(self) -> Callable[[float], float]:
raise NotImplementedError

View File

@ -1,5 +1,5 @@
[build-system]
requires = [ "poetry-core>=1.0.0",]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
@ -23,14 +23,16 @@ python = ">=3.9,<4.0"
langchain-core = "^0.3.15"
[tool.ruff.lint]
select = [ "E", "F", "I", "T201",]
select = ["E", "F", "I", "T201"]
[tool.coverage.run]
omit = [ "tests/*",]
omit = ["tests/*"]
[tool.pytest.ini_options]
addopts = "--strict-markers --strict-config --durations=5"
markers = [ "compile: mark placeholder test used to compile integration tests without running them",]
markers = [
"compile: mark placeholder test used to compile integration tests without running them",
]
asyncio_mode = "auto"
[tool.poetry.group.test]
@ -48,11 +50,14 @@ optional = true
[tool.poetry.group.dev]
optional = true
[tool.poetry.group.dev.dependencies]
[tool.poetry.group.test.dependencies]
pytest = "^7.4.3"
pytest-asyncio = "^0.23.2"
pytest-socket = "^0.7.0"
pytest-watcher = "^0.3.4"
langchain-tests = "^0.3.4"
[tool.poetry.group.codespell.dependencies]
codespell = "^2.2.6"
@ -64,15 +69,3 @@ ruff = "^0.5"
[tool.poetry.group.typing.dependencies]
mypy = "^1.10"
[tool.poetry.group.test.dependencies.langchain-core]
path = "../../core"
develop = true
[tool.poetry.group.dev.dependencies.langchain-core]
path = "../../core"
develop = true
[tool.poetry.group.typing.dependencies.langchain-core]
path = "../../core"
develop = true

View File

@ -1,64 +1,21 @@
"""Test Chat__ModuleName__ chat model."""
from typing import Type
from __module_name__.chat_models import Chat__ModuleName__
from langchain_tests.integration_tests import ChatModelIntegrationTests
def test_stream() -> None:
"""Test streaming tokens from OpenAI."""
llm = Chat__ModuleName__()
class TestChatParrotLinkIntegration(ChatModelIntegrationTests):
@property
def chat_model_class(self) -> Type[Chat__ModuleName__]:
return Chat__ModuleName__
for token in llm.stream("I'm Pickle Rick"):
assert isinstance(token.content, str)
async def test_astream() -> None:
"""Test streaming tokens from OpenAI."""
llm = Chat__ModuleName__()
async for token in llm.astream("I'm Pickle Rick"):
assert isinstance(token.content, str)
async def test_abatch() -> None:
"""Test streaming tokens from Chat__ModuleName__."""
llm = Chat__ModuleName__()
result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
for token in result:
assert isinstance(token.content, str)
async def test_abatch_tags() -> None:
"""Test batch tokens from Chat__ModuleName__."""
llm = Chat__ModuleName__()
result = await llm.abatch(
["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]}
)
for token in result:
assert isinstance(token.content, str)
def test_batch() -> None:
"""Test batch tokens from Chat__ModuleName__."""
llm = Chat__ModuleName__()
result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
for token in result:
assert isinstance(token.content, str)
async def test_ainvoke() -> None:
"""Test invoke tokens from Chat__ModuleName__."""
llm = Chat__ModuleName__()
result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
assert isinstance(result.content, str)
def test_invoke() -> None:
"""Test invoke tokens from Chat__ModuleName__."""
llm = Chat__ModuleName__()
result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"]))
assert isinstance(result.content, str)
@property
def chat_model_params(self) -> dict:
# These should be parameters used to initialize your integration for testing
return {
"model": "bird-brain-001",
"temperature": 0,
"parrot_buffer_length": 50,
}

View File

@ -1,20 +1,16 @@
"""Test __ModuleName__ embeddings."""
from typing import Type
from __module_name__.embeddings import __ModuleName__Embeddings
from langchain_tests.integration_tests import EmbeddingsIntegrationTests
def test___module_name___embedding_documents() -> None:
"""Test cohere embeddings."""
documents = ["foo bar"]
embedding = __ModuleName__Embeddings()
output = embedding.embed_documents(documents)
assert len(output) == 1
assert len(output[0]) > 0
class TestParrotLinkEmbeddingsIntegration(EmbeddingsIntegrationTests):
@property
def embeddings_class(self) -> Type[__ModuleName__Embeddings]:
return __ModuleName__Embeddings
def test___module_name___embedding_query() -> None:
"""Test cohere embeddings."""
document = "foo bar"
embedding = __ModuleName__Embeddings()
output = embedding.embed_query(document)
assert len(output) > 0
@property
def embedding_model_params(self) -> dict:
return {"model": "nest-embed-001"}

View File

@ -1,64 +0,0 @@
"""Test __ModuleName__LLM llm."""
from __module_name__.llms import __ModuleName__LLM
def test_stream() -> None:
"""Test streaming tokens from OpenAI."""
llm = __ModuleName__LLM()
for token in llm.stream("I'm Pickle Rick"):
assert isinstance(token, str)
async def test_astream() -> None:
"""Test streaming tokens from OpenAI."""
llm = __ModuleName__LLM()
async for token in llm.astream("I'm Pickle Rick"):
assert isinstance(token, str)
async def test_abatch() -> None:
"""Test streaming tokens from __ModuleName__LLM."""
llm = __ModuleName__LLM()
result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
for token in result:
assert isinstance(token, str)
async def test_abatch_tags() -> None:
"""Test batch tokens from __ModuleName__LLM."""
llm = __ModuleName__LLM()
result = await llm.abatch(
["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]}
)
for token in result:
assert isinstance(token, str)
def test_batch() -> None:
"""Test batch tokens from __ModuleName__LLM."""
llm = __ModuleName__LLM()
result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
for token in result:
assert isinstance(token, str)
async def test_ainvoke() -> None:
"""Test invoke tokens from __ModuleName__LLM."""
llm = __ModuleName__LLM()
result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
assert isinstance(result, str)
def test_invoke() -> None:
"""Test invoke tokens from __ModuleName__LLM."""
llm = __ModuleName__LLM()
result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"]))
assert isinstance(result, str)

View File

@ -0,0 +1,24 @@
from typing import Type
from __module_name__.retrievers import __ModuleName__Retriever
from langchain_tests.integration_tests import (
RetrieversIntegrationTests,
)
class Test__ModuleName__Retriever(RetrieversIntegrationTests):
@property
def retriever_constructor(self) -> Type[__ModuleName__Retriever]:
"""Get an empty vectorstore for unit tests."""
return __ModuleName__Retriever
@property
def retriever_constructor_params(self) -> dict:
return {"k": 2}
@property
def retriever_query_example(self) -> str:
"""
Returns a dictionary representing the "args" of an example retriever call.
"""
return "example query"

View File

@ -0,0 +1,27 @@
from typing import Type
from __module_name__.tools import __ModuleName__Tool
from langchain_tests.integration_tests import ToolsIntegrationTests
class TestParrotMultiplyToolIntegration(ToolsIntegrationTests):
@property
def tool_constructor(self) -> Type[__ModuleName__Tool]:
return __ModuleName__Tool
@property
def tool_constructor_params(self) -> dict:
# if your tool constructor instead required initialization arguments like
# `def __init__(self, some_arg: int):`, you would return those here
# as a dictionary, e.g.: `return {'some_arg': 42}`
return {}
@property
def tool_invoke_params_example(self) -> dict:
"""
Returns a dictionary representing the "args" of an example tool call.
This should NOT be a ToolCall dict - i.e. it should not
have {"name", "id", "args"} keys.
"""
return {"a": 2, "b": 3}

View File

@ -0,0 +1,37 @@
from typing import AsyncGenerator, Generator
import pytest
from __module_name__.vectorstores import __ModuleName__VectorStore
from langchain_core.vectorstores import VectorStore
from langchain_tests.integration_tests import (
AsyncReadWriteTestSuite,
ReadWriteTestSuite,
)
class Test__ModuleName__VectorStoreSync(ReadWriteTestSuite):
@pytest.fixture()
def vectorstore(self) -> Generator[VectorStore, None, None]: # type: ignore
"""Get an empty vectorstore for unit tests."""
store = __ModuleName__VectorStore()
# note: store should be EMPTY at this point
# if you need to delete data, you may do so here
try:
yield store
finally:
# cleanup operations, or deleting data
pass
class Test__ModuleName__VectorStoreAsync(AsyncReadWriteTestSuite):
@pytest.fixture()
async def vectorstore(self) -> AsyncGenerator[VectorStore, None]: # type: ignore
"""Get an empty vectorstore for unit tests."""
store = __ModuleName__VectorStore()
# note: store should be EMPTY at this point
# if you need to delete data, you may do so here
try:
yield store
finally:
# cleanup operations, or deleting data
pass

View File

@ -1,8 +1,21 @@
"""Test chat model integration."""
from typing import Type
from __module_name__.chat_models import Chat__ModuleName__
from langchain_tests.unit_tests import ChatModelUnitTests
def test_initialization() -> None:
"""Test chat model initialization."""
Chat__ModuleName__()
class TestChat__ModuleName__Unit(ChatModelUnitTests):
@property
def chat_model_class(self) -> Type[Chat__ModuleName__]:
return Chat__ModuleName__
@property
def chat_model_params(self) -> dict:
# These should be parameters used to initialize your integration for testing
return {
"model": "bird-brain-001",
"temperature": 0,
"parrot_buffer_length": 50,
}

View File

@ -1,8 +1,16 @@
"""Test embedding model integration."""
from typing import Type
from __module_name__.embeddings import __ModuleName__Embeddings
from langchain_tests.unit_tests import EmbeddingsUnitTests
def test_initialization() -> None:
"""Test embedding model initialization."""
__ModuleName__Embeddings()
class TestParrotLinkEmbeddingsUnit(EmbeddingsUnitTests):
@property
def embeddings_class(self) -> Type[__ModuleName__Embeddings]:
return __ModuleName__Embeddings
@property
def embedding_model_params(self) -> dict:
return {"model": "nest-embed-001"}

View File

@ -1,12 +0,0 @@
from __module_name__ import __all__
EXPECTED_ALL = [
"__ModuleName__LLM",
"Chat__ModuleName__",
"__ModuleName__VectorStore",
"__ModuleName__Embeddings",
]
def test_all_imports() -> None:
assert sorted(EXPECTED_ALL) == sorted(__all__)

View File

@ -1,8 +0,0 @@
"""Test __ModuleName__ Chat API wrapper."""
from __module_name__ import __ModuleName__LLM
def test_initialization() -> None:
"""Test integration initialization."""
__ModuleName__LLM()

View File

@ -0,0 +1,27 @@
from typing import Type
from __module_name__.tools import __ModuleName__Tool
from langchain_tests.unit_tests import ToolsUnitTests
class TestParrotMultiplyToolUnit(ToolsUnitTests):
@property
def tool_constructor(self) -> Type[__ModuleName__Tool]:
return __ModuleName__Tool
@property
def tool_constructor_params(self) -> dict:
# if your tool constructor instead required initialization arguments like
# `def __init__(self, some_arg: int):`, you would return those here
# as a dictionary, e.g.: `return {'some_arg': 42}`
return {}
@property
def tool_invoke_params_example(self) -> dict:
"""
Returns a dictionary representing the "args" of an example tool call.
This should NOT be a ToolCall dict - i.e. it should not
have {"name", "id", "args"} keys.
"""
return {"a": 2, "b": 3}

View File

@ -1,6 +0,0 @@
from __module_name__.vectorstores import __ModuleName__VectorStore
def test_initialization() -> None:
"""Test integration vectorstore initialization."""
__ModuleName__VectorStore()

View File

@ -6,7 +6,7 @@ import re
import shutil
import subprocess
from pathlib import Path
from typing import Optional
from typing import Dict, Optional, cast
import typer
from typing_extensions import Annotated, TypedDict
@ -15,19 +15,17 @@ from langchain_cli.utils.find_replace import replace_file, replace_glob
integration_cli = typer.Typer(no_args_is_help=True, add_completion=False)
Replacements = TypedDict(
"Replacements",
{
"__package_name__": str,
"__module_name__": str,
"__ModuleName__": str,
"__MODULE_NAME__": str,
"__package_name_short__": str,
},
)
class Replacements(TypedDict):
__package_name__: str
__module_name__: str
__ModuleName__: str
__MODULE_NAME__: str
__package_name_short__: str
__package_name_short_snake__: str
def _process_name(name: str, *, community: bool = False):
def _process_name(name: str, *, community: bool = False) -> Replacements:
preprocessed = name.replace("_", "-").lower()
if preprocessed.startswith("langchain-"):
@ -42,7 +40,7 @@ def _process_name(name: str, *, community: bool = False):
raise ValueError("Name should not end with `-`.")
if preprocessed.find("--") != -1:
raise ValueError("Name should not contain consecutive hyphens.")
replacements = {
replacements: Replacements = {
"__package_name__": f"langchain-{preprocessed}",
"__module_name__": "langchain_" + preprocessed.replace("-", "_"),
"__ModuleName__": preprocessed.title().replace("-", ""),
@ -52,7 +50,7 @@ def _process_name(name: str, *, community: bool = False):
}
if community:
replacements["__module_name__"] = preprocessed.replace("-", "_")
return Replacements(replacements)
return replacements
@integration_cli.command()
@ -74,16 +72,7 @@ def new(
):
"""
Creates a new integration package.
Should be run from libs/partners
"""
# confirm that we are in the right directory
if not Path.cwd().name == "partners" or not Path.cwd().parent.name == "libs":
typer.echo(
"This command should be run from the `libs/partners` directory in the "
"langchain-ai/langchain monorepo. Continuing is NOT recommended."
)
typer.confirm("Are you sure you want to continue?", abort=True)
try:
replacements = _process_name(name)
@ -104,7 +93,7 @@ def new(
"Name of integration in PascalCase", default=replacements["__ModuleName__"]
)
destination_dir = Path.cwd() / replacements["__package_name_short__"]
destination_dir = Path.cwd() / replacements["__package_name__"]
if destination_dir.exists():
typer.echo(f"Folder {destination_dir} exists.")
raise typer.Exit(code=1)
@ -118,7 +107,7 @@ def new(
shutil.move(destination_dir / "integration_template", package_dir)
# replacements in files
replace_glob(destination_dir, "**/*", replacements)
replace_glob(destination_dir, "**/*", cast(Dict[str, str], replacements))
# poetry install
subprocess.run(
@ -226,4 +215,4 @@ def create_doc(
shutil.copy(docs_template, destination_path)
# replacements in file
replace_file(destination_path, replacements)
replace_file(destination_path, cast(Dict[str, str], replacements))

View File

@ -17,7 +17,7 @@ PARTNER_PKGS = PKGS_ROOT / "partners"
class ImportExtractor(ast.NodeVisitor):
def __init__(self, *, from_package: Optional[str] = None) -> None:
"""Extract all imports from the given code, optionally filtering by package."""
self.imports = []
self.imports: list = []
self.package = from_package
def visit_ImportFrom(self, node):
@ -68,7 +68,7 @@ def find_subclasses_in_module(module, classes_: List[Type]) -> List[str]:
return subclasses
def _get_all_classnames_from_file(file: str, pkg: str) -> List[Tuple[str, str]]:
def _get_all_classnames_from_file(file: Path, pkg: str) -> List[Tuple[str, str]]:
"""Extract all class names from a file."""
with open(file, encoding="utf-8") as f:
code = f.read()
@ -145,7 +145,7 @@ def find_imports_from_package(
return extractor.imports
def _get_current_module(path: str, pkg_root: str) -> str:
def _get_current_module(path: Path, pkg_root: str) -> str:
"""Convert a path to a module name."""
path_as_pathlib = pathlib.Path(os.path.abspath(path))
relative_path = path_as_pathlib.relative_to(pkg_root).with_suffix("")

View File

@ -4,7 +4,7 @@ from pathlib import Path
import rich
import typer
from gritql import run
from gritql import run # type: ignore
from typer import Option

View File

@ -13,7 +13,7 @@ def find_and_replace(source: str, replacements: Dict[str, str]) -> str:
return rtn
def replace_file(source: Path, replacements: Dict[str, str]) -> None:
def replace_file(source: Path, replacements: dict[str, str]) -> None:
try:
content = source.read_text()
except UnicodeDecodeError:
@ -24,7 +24,7 @@ def replace_file(source: Path, replacements: Dict[str, str]) -> None:
source.write_text(new_content)
def replace_glob(parent: Path, glob: str, replacements: Dict[str, str]) -> None:
def replace_glob(parent: Path, glob: str, replacements: dict[str, str]) -> None:
for file in parent.glob(glob):
if not file.is_file():
continue

1121
libs/cli/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -25,16 +25,18 @@ langchain = "langchain_cli.cli:app"
langchain-cli = "langchain_cli.cli:app"
[tool.poetry.group.dev.dependencies]
poethepoet = "^0.24.1"
pytest = "^7.4.2"
pytest-watch = "^4.2.0"
[tool.poetry.group.lint.dependencies]
ruff = "^0.5"
mypy = "^1.13.0"
[tool.poetry.group.test.dependencies]
langchain = {path = "../langchain", develop = true}
[tool.poetry.group.typing.dependencies]
langchain = {path = "../langchain", develop = true}
[tool.poetry.group.test_integration.dependencies]
@ -50,22 +52,11 @@ select = [
"T201", # print
]
[tool.poe.tasks]
test = "poetry run pytest tests"
watch = "poetry run ptw"
version = "poetry version --short"
bump = ["_bump_1", "_bump_2"]
lint = ["_lint", "_check_formatting"]
format = ["_format", "_lint_fix"]
_bump_2.shell = """sed -i "" "/^__version__ =/c\\ \n__version__ = \\"$version\\"\n" langchain_cli/cli.py"""
_bump_2.uses = { version = "version" }
_bump_1 = "poetry version patch"
_check_formatting = "poetry run ruff format . --diff"
_lint = "poetry run ruff check ."
_format = "poetry run ruff format ."
_lint_fix = "poetry run ruff check . --fix"
[tool.mypy]
exclude = [
"langchain_cli/integration_template",
"langchain_cli/package_template",
]
[build-system]
requires = ["poetry-core"]

View File

@ -1,3 +1,4 @@
# type: ignore
"""Script to generate migrations for the migration script."""
import json

View File

@ -0,0 +1,7 @@
import pytest
@pytest.mark.compile
def test_placeholder() -> None:
"""Used for compiling integration tests without running any real tests."""
pass

View File

@ -41,6 +41,7 @@ def find_issue(current: Folder, expected: Folder) -> str:
return "Unknown"
@pytest.mark.xfail(reason="grit may not be installed in env")
def test_command_line(tmp_path: Path) -> None:
runner = CliRunner()

View File

@ -11,6 +11,7 @@ modules = [
"vectorstores",
"embeddings",
"tools",
"retrievers",
]
for module in modules: