From ecdfc98ef6b3a22e1269430380cad96636c848c0 Mon Sep 17 00:00:00 2001 From: ccurme Date: Thu, 5 Dec 2024 14:39:03 -0500 Subject: [PATCH] tests[patch]: run standard tests for embeddings and populate embeddings API ref (#28545) plus minor updates to chat models and vector store API refs --- .../integration_tests/chat_models.py | 31 +++---- .../integration_tests/embeddings.py | 67 +++++++++++++++ .../integration_tests/vectorstores.py | 4 + .../langchain_tests/unit_tests/chat_models.py | 34 ++++---- .../langchain_tests/unit_tests/embeddings.py | 85 ++++++++++++++++++- .../tests/unit_tests/test_embeddings.py | 26 ++++++ 6 files changed, 214 insertions(+), 33 deletions(-) create mode 100644 libs/standard-tests/tests/unit_tests/test_embeddings.py diff --git a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py index 1cbec02ad0d..4e6a811c451 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/integration_tests/chat_models.py @@ -75,29 +75,30 @@ def _validate_tool_call_message_no_args(message: BaseMessage) -> None: class ChatModelIntegrationTests(ChatModelTests): """Base class for chat model integration tests. - Test subclasses must implement the following two properties: + Test subclasses must implement the ``chat_model_class`` and + ``chat_model_params`` properties to specify what model to test and its + initialization parameters. - chat_model_class - The chat model class to test, e.g., ``ChatParrotLink``. + Example: - Example: + .. code-block:: python - .. code-block:: python + from typing import Type + from langchain_tests.integration_tests import ChatModelIntegrationTests + from my_package.chat_models import MyChatModel + + + class TestMyChatModelIntegration(ChatModelIntegrationTests): @property - def chat_model_class(self) -> Type[ChatParrotLink]: - return ChatParrotLink - - chat_model_params - Initialization parameters for the chat model. - - Example: - - .. code-block:: python + def chat_model_class(self) -> Type[MyChatModel]: + # Return the chat model class to test here + return MyChatModel @property def chat_model_params(self) -> dict: - return {"model": "bird-brain-001", "temperature": 0} + # Return initialization parameters for the model. + return {"model": "model-001", "temperature": 0} .. note:: API references for individual test methods include troubleshooting tips. diff --git a/libs/standard-tests/langchain_tests/integration_tests/embeddings.py b/libs/standard-tests/langchain_tests/integration_tests/embeddings.py index 7e3689d0f54..8b4f20bb4a5 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/embeddings.py +++ b/libs/standard-tests/langchain_tests/integration_tests/embeddings.py @@ -6,7 +6,47 @@ from langchain_tests.unit_tests.embeddings import EmbeddingsTests class EmbeddingsIntegrationTests(EmbeddingsTests): + """Base class for embeddings integration tests. + + Test subclasses must implement the ``embeddings_class`` property to specify the + embeddings model to be tested. You can also override the + ``embedding_model_params`` property to specify initialization parameters. + + Example: + + .. code-block:: python + + from typing import Type + + from langchain_tests.integration_tests import EmbeddingsIntegrationTests + from my_package.embeddings import MyEmbeddingsModel + + + class TestMyEmbeddingsModelIntegration(EmbeddingsIntegrationTests): + @property + def embeddings_class(self) -> Type[MyEmbeddingsModel]: + # Return the embeddings model class to test here + return MyEmbeddingsModel + + @property + def embedding_model_params(self) -> dict: + # Return initialization parameters for the model. + return {"model": "model-001"} + + .. note:: + API references for individual test methods include troubleshooting tips. + """ + def test_embed_query(self, model: Embeddings) -> None: + """Test embedding a string query. + + .. dropdown:: Troubleshooting + + If this test fails, check that: + + 1. The model will generate a list of floats when calling ``.embed_query`` on a string. + 2. The length of the list is consistent across different inputs. + """ # noqa: E501 embedding_1 = model.embed_query("foo") assert isinstance(embedding_1, List) @@ -18,6 +58,15 @@ class EmbeddingsIntegrationTests(EmbeddingsTests): assert len(embedding_1) == len(embedding_2) def test_embed_documents(self, model: Embeddings) -> None: + """Test embedding a list of strings. + + .. dropdown:: Troubleshooting + + If this test fails, check that: + + 1. The model will generate a list of lists of floats when calling ``.embed_documents`` on a list of strings. + 2. The length of each list is the same. + """ # noqa: E501 documents = ["foo", "bar", "baz"] embeddings = model.embed_documents(documents) @@ -28,6 +77,15 @@ class EmbeddingsIntegrationTests(EmbeddingsTests): assert all(len(embedding) == len(embeddings[0]) for embedding in embeddings) async def test_aembed_query(self, model: Embeddings) -> None: + """Test embedding a string query async. + + .. dropdown:: Troubleshooting + + If this test fails, check that: + + 1. The model will generate a list of floats when calling ``.aembed_query`` on a string. + 2. The length of the list is consistent across different inputs. + """ # noqa: E501 embedding_1 = await model.aembed_query("foo") assert isinstance(embedding_1, List) @@ -39,6 +97,15 @@ class EmbeddingsIntegrationTests(EmbeddingsTests): assert len(embedding_1) == len(embedding_2) async def test_aembed_documents(self, model: Embeddings) -> None: + """Test embedding a list of strings async. + + .. dropdown:: Troubleshooting + + If this test fails, check that: + + 1. The model will generate a list of lists of floats when calling ``.aembed_documents`` on a list of strings. + 2. The length of each list is the same. + """ # noqa: E501 documents = ["foo", "bar", "baz"] embeddings = await model.aembed_documents(documents) diff --git a/libs/standard-tests/langchain_tests/integration_tests/vectorstores.py b/libs/standard-tests/langchain_tests/integration_tests/vectorstores.py index 07d5ced7202..36770823be5 100644 --- a/libs/standard-tests/langchain_tests/integration_tests/vectorstores.py +++ b/libs/standard-tests/langchain_tests/integration_tests/vectorstores.py @@ -76,6 +76,8 @@ class ReadWriteTestSuite(BaseStandardTests): store.delete_collection() pass + .. note:: + API references for individual test methods include troubleshooting tips. """ # noqa: E501 @abstractmethod @@ -445,6 +447,8 @@ class AsyncReadWriteTestSuite(BaseStandardTests): store.delete_collection() pass + .. note:: + API references for individual test methods include troubleshooting tips. """ # noqa: E501 @abstractmethod diff --git a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py index 2de0776f5e3..71f3dd2169c 100644 --- a/libs/standard-tests/langchain_tests/unit_tests/chat_models.py +++ b/libs/standard-tests/langchain_tests/unit_tests/chat_models.py @@ -380,29 +380,30 @@ class ChatModelTests(BaseStandardTests): class ChatModelUnitTests(ChatModelTests): """Base class for chat model unit tests. - Test subclasses must implement the following two properties: + Test subclasses must implement the ``chat_model_class`` and + ``chat_model_params`` properties to specify what model to test and its + initialization parameters. - chat_model_class - The chat model class to test, e.g., ``ChatParrotLink``. + Example: - Example: + .. code-block:: python - .. code-block:: python + from typing import Type + from langchain_tests.unit_tests import ChatModelUnitTests + from my_package.chat_models import MyChatModel + + + class TestMyChatModelUnit(ChatModelUnitTests): @property - def chat_model_class(self) -> Type[ChatParrotLink]: - return ChatParrotLink - - chat_model_params - Initialization parameters for the chat model. - - Example: - - .. code-block:: python + def chat_model_class(self) -> Type[MyChatModel]: + # Return the chat model class to test here + return MyChatModel @property def chat_model_params(self) -> dict: - return {"model": "bird-brain-001", "temperature": 0} + # Return initialization parameters for the model. + return {"model": "model-001", "temperature": 0} .. note:: API references for individual test methods include troubleshooting tips. @@ -486,7 +487,8 @@ class ChatModelUnitTests(ChatModelTests): .. dropdown:: Troubleshooting If this test fails, ensure that ``init_from_env_params`` is specified - correctly. + correctly and that model parameters are properly set from environment + variables during initialization. """ env_params, model_params, expected_attrs = self.init_from_env_params if not env_params: diff --git a/libs/standard-tests/langchain_tests/unit_tests/embeddings.py b/libs/standard-tests/langchain_tests/unit_tests/embeddings.py index da7b7851384..c580243b16d 100644 --- a/libs/standard-tests/langchain_tests/unit_tests/embeddings.py +++ b/libs/standard-tests/langchain_tests/unit_tests/embeddings.py @@ -25,17 +25,98 @@ class EmbeddingsTests(BaseStandardTests): class EmbeddingsUnitTests(EmbeddingsTests): + """Base class for embeddings unit tests. + + Test subclasses must implement the ``embeddings_class`` property to specify the + embeddings model to be tested. You can also override the + ``embedding_model_params`` property to specify initialization parameters. + + Example: + + .. code-block:: python + + from typing import Type + + from langchain_tests.unit_tests import EmbeddingsUnitTests + from my_package.embeddings import MyEmbeddingsModel + + + class TestMyEmbeddingsModelUnit(EmbeddingsUnitTests): + @property + def embeddings_class(self) -> Type[MyEmbeddingsModel]: + # Return the embeddings model class to test here + return MyEmbeddingsModel + + @property + def embedding_model_params(self) -> dict: + # Return initialization parameters for the model. + return {"model": "model-001"} + + .. note:: + API references for individual test methods include troubleshooting tips. + + Testing initialization from environment variables + Overriding the ``init_from_env_params`` property will enable additional tests + for initialization from environment variables. See below for details. + + .. dropdown:: init_from_env_params + + This property is used in unit tests to test initialization from + environment variables. It should return a tuple of three dictionaries + that specify the environment variables, additional initialization args, + and expected instance attributes to check. + + Defaults to empty dicts. If not overridden, the test is skipped. + + Example: + + .. code-block:: python + + @property + def init_from_env_params(self) -> Tuple[dict, dict, dict]: + return ( + { + "MY_API_KEY": "api_key", + }, + { + "model": "model-001", + }, + { + "my_api_key": "api_key", + }, + ) + """ # noqa: E501 + def test_init(self) -> None: + """Test model initialization. + + .. dropdown:: Troubleshooting + + If this test fails, ensure that ``embedding_model_params`` is specified + and the model can be initialized from those params. + """ model = self.embeddings_class(**self.embedding_model_params) assert model is not None @property def init_from_env_params(self) -> Tuple[dict, dict, dict]: - """Return env vars, init args, and expected instance attrs for initializing - from env vars.""" + """This property is used in unit tests to test initialization from environment + variables. It should return a tuple of three dictionaries that specify the + environment variables, additional initialization args, and expected instance + attributes to check.""" return {}, {}, {} def test_init_from_env(self) -> None: + """Test initialization from environment variables. Relies on the + ``init_from_env_params`` property. Test is skipped if that property is not + set. + + .. dropdown:: Troubleshooting + + If this test fails, ensure that ``init_from_env_params`` is specified + correctly and that model parameters are properly set from environment + variables during initialization. + """ env_params, embeddings_params, expected_attrs = self.init_from_env_params if env_params: with mock.patch.dict(os.environ, env_params): diff --git a/libs/standard-tests/tests/unit_tests/test_embeddings.py b/libs/standard-tests/tests/unit_tests/test_embeddings.py new file mode 100644 index 00000000000..d2a551decca --- /dev/null +++ b/libs/standard-tests/tests/unit_tests/test_embeddings.py @@ -0,0 +1,26 @@ +from typing import Type + +from langchain_core.embeddings import DeterministicFakeEmbedding, Embeddings + +from langchain_tests.integration_tests import EmbeddingsIntegrationTests +from langchain_tests.unit_tests import EmbeddingsUnitTests + + +class TestFakeEmbeddingsUnit(EmbeddingsUnitTests): + @property + def embeddings_class(self) -> Type[Embeddings]: + return DeterministicFakeEmbedding + + @property + def embedding_model_params(self) -> dict: + return {"size": 6} # embedding dimension + + +class TestFakeEmbeddingsIntegration(EmbeddingsIntegrationTests): + @property + def embeddings_class(self) -> Type[Embeddings]: + return DeterministicFakeEmbedding + + @property + def embedding_model_params(self) -> dict: + return {"size": 6}