mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-28 11:55:21 +00:00
pinecone: Review pinecone tests (#29073)
Title: langchain-pinecone: improve test structure and async handling Description: This PR improves the test infrastructure for the langchain-pinecone package by: 1. Implementing LangChain's standard test patterns for embeddings 2. Adding comprehensive configuration testing 3. Improving async test coverage 4. Fixing integration test issues with namespaces and async markers The changes make the tests more robust, maintainable, and aligned with LangChain's testing standards while ensuring proper async behavior in the embeddings implementation. Key improvements: - Added standard EmbeddingsTests implementation - Split custom configuration tests into a separate test class - Added proper async test coverage with pytest-asyncio - Fixed namespace handling in vector store integration tests - Improved test organization and documentation Dependencies: None (uses existing test dependencies) Tests and Documentation: - ✅ Added standard test implementation following LangChain's patterns - ✅ Added comprehensive unit tests for configuration and async behavior - ✅ All tests passing locally - No documentation changes needed (internal test improvements only) Twitter handle: N/A --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
d9c51b71c4
commit
ce9e9f9314
@ -106,7 +106,6 @@ class PineconeEmbeddings(BaseModel, Embeddings):
|
||||
self._client = client
|
||||
|
||||
# Ensure async_client is lazily initialized
|
||||
_ = self.async_client
|
||||
return self
|
||||
|
||||
def _get_batch_iterator(self, texts: List[str]) -> Iterable:
|
||||
|
955
libs/partners/pinecone/poetry.lock
generated
955
libs/partners/pinecone/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
[build-system]
|
||||
requires = [ "poetry-core>=1.0.0",]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
@ -24,17 +24,22 @@ langchain-core = "^0.3.21"
|
||||
pinecone = "^5.4.0"
|
||||
aiohttp = ">=3.9.5,<3.10"
|
||||
numpy = ">=1.26.0,<2.0.0"
|
||||
langchain-tests = "^0.3.7"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = [ "E", "F", "I", "T201",]
|
||||
select = ["E", "F", "I", "T201"]
|
||||
|
||||
[tool.coverage.run]
|
||||
omit = [ "tests/*",]
|
||||
omit = ["tests/*"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
|
||||
markers = [ "requires: mark tests as requiring a specific library", "compile: mark placeholder test used to compile integration tests without running them",]
|
||||
markers = [
|
||||
"requires: mark tests as requiring a specific library",
|
||||
"compile: mark placeholder test used to compile integration tests without running them",
|
||||
]
|
||||
asyncio_mode = "auto"
|
||||
asyncio_default_fixture_loop_scope = "function"
|
||||
|
||||
[tool.poetry.group.test]
|
||||
optional = true
|
||||
@ -52,12 +57,12 @@ optional = true
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^7.3.0"
|
||||
pytest = "^8"
|
||||
freezegun = "^1.2.2"
|
||||
pytest-mock = "^3.10.0"
|
||||
syrupy = "^4.0.2"
|
||||
pytest-watcher = "^0.3.4"
|
||||
pytest-asyncio = "^0.21.1"
|
||||
pytest-asyncio = ">=0.25.0,<1"
|
||||
|
||||
[tool.poetry.group.codespell.dependencies]
|
||||
codespell = "^2.2.0"
|
||||
|
@ -1,19 +1,24 @@
|
||||
import time
|
||||
from typing import AsyncGenerator
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
from pinecone import Pinecone, ServerlessSpec # type: ignore
|
||||
|
||||
from langchain_pinecone import PineconeEmbeddings, PineconeVectorStore
|
||||
from tests.integration_tests.test_vectorstores import DEFAULT_SLEEP
|
||||
|
||||
DIMENSION = 1024
|
||||
INDEX_NAME = "langchain-pinecone-embeddings"
|
||||
MODEL = "multilingual-e5-large"
|
||||
NAMESPACE_NAME = "test_namespace"
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def embd_client() -> PineconeEmbeddings:
|
||||
return PineconeEmbeddings(model=MODEL)
|
||||
@pytest.fixture(scope="function")
|
||||
async def embd_client() -> AsyncGenerator[PineconeEmbeddings, None]:
|
||||
client = PineconeEmbeddings(model=MODEL)
|
||||
yield client
|
||||
await client.async_client.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -44,6 +49,7 @@ def test_embed_query(embd_client: PineconeEmbeddings) -> None:
|
||||
assert len(out) == DIMENSION
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aembed_query(embd_client: PineconeEmbeddings) -> None:
|
||||
out = await embd_client.aembed_query("Hello, world!")
|
||||
assert isinstance(out, list)
|
||||
@ -57,6 +63,7 @@ def test_embed_documents(embd_client: PineconeEmbeddings) -> None:
|
||||
assert len(out[0]) == DIMENSION
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aembed_documents(embd_client: PineconeEmbeddings) -> None:
|
||||
out = await embd_client.aembed_documents(["Hello, world!", "This is a test."])
|
||||
assert isinstance(out, list)
|
||||
@ -68,7 +75,10 @@ def test_vector_store(
|
||||
embd_client: PineconeEmbeddings, pc_index: Pinecone.Index
|
||||
) -> None:
|
||||
vectorstore = PineconeVectorStore(index_name=INDEX_NAME, embedding=embd_client)
|
||||
vectorstore.add_documents([Document("Hello, world!"), Document("This is a test.")])
|
||||
time.sleep(5)
|
||||
resp = vectorstore.similarity_search(query="hello")
|
||||
vectorstore.add_documents(
|
||||
[Document("Hello, world!"), Document("This is a test.")],
|
||||
namespace=NAMESPACE_NAME,
|
||||
)
|
||||
time.sleep(DEFAULT_SLEEP) # Increase wait time to ensure indexing is complete
|
||||
resp = vectorstore.similarity_search(query="hello", namespace=NAMESPACE_NAME)
|
||||
assert len(resp) == 2
|
||||
|
@ -8,7 +8,8 @@ import pinecone # type: ignore
|
||||
import pytest # type: ignore[import-not-found]
|
||||
from langchain_core.documents import Document
|
||||
from langchain_openai import OpenAIEmbeddings # type: ignore[import-not-found]
|
||||
from pinecone import PodSpec
|
||||
from langchain_tests.integration_tests.vectorstores import VectorStoreIntegrationTests
|
||||
from pinecone import ServerlessSpec
|
||||
from pytest_mock import MockerFixture # type: ignore[import-not-found]
|
||||
|
||||
from langchain_pinecone import PineconeVectorStore
|
||||
@ -20,49 +21,43 @@ DIMENSION = 1536 # dimension of the embeddings
|
||||
DEFAULT_SLEEP = 20
|
||||
|
||||
|
||||
class TestPinecone:
|
||||
class TestPinecone(VectorStoreIntegrationTests):
|
||||
index: "pinecone.Index"
|
||||
pc: "pinecone.Pinecone"
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls) -> None:
|
||||
def setup_class(self) -> None:
|
||||
import pinecone
|
||||
|
||||
client = pinecone.Pinecone(api_key=os.environ["PINECONE_API_KEY"])
|
||||
index_list = client.list_indexes()
|
||||
for i in index_list:
|
||||
if i["name"] == INDEX_NAME:
|
||||
if INDEX_NAME in [
|
||||
i["name"] for i in index_list
|
||||
]: # change to list comprehension
|
||||
client.delete_index(INDEX_NAME)
|
||||
break
|
||||
if len(index_list) > 0:
|
||||
time.sleep(DEFAULT_SLEEP) # prevent race with creation
|
||||
time.sleep(DEFAULT_SLEEP) # prevent race with subsequent creation
|
||||
client.create_index(
|
||||
name=INDEX_NAME,
|
||||
dimension=DIMENSION,
|
||||
metric="cosine",
|
||||
spec=PodSpec(environment="gcp-starter"),
|
||||
spec=ServerlessSpec(cloud="aws", region="us-west-2"),
|
||||
)
|
||||
|
||||
cls.index = client.Index(INDEX_NAME)
|
||||
|
||||
# insure the index is empty
|
||||
index_stats = cls.index.describe_index_stats()
|
||||
assert index_stats["dimension"] == DIMENSION
|
||||
if index_stats["namespaces"].get(NAMESPACE_NAME) is not None:
|
||||
assert index_stats["namespaces"][NAMESPACE_NAME]["vector_count"] == 0
|
||||
self.index = client.Index(INDEX_NAME)
|
||||
self.pc = client
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls) -> None:
|
||||
index_stats = cls.index.describe_index_stats()
|
||||
for _namespace_name in index_stats["namespaces"].keys():
|
||||
cls.index.delete(delete_all=True, namespace=_namespace_name)
|
||||
def teardown_class(self) -> None:
|
||||
self.pc.delete_index()
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self) -> None:
|
||||
# delete all the vectors in the index
|
||||
print("called") # noqa: T201
|
||||
index_stats = self.index.describe_index_stats()
|
||||
if index_stats["total_vector_count"] > 0:
|
||||
try:
|
||||
self.index.delete(delete_all=True, namespace=NAMESPACE_NAME)
|
||||
time.sleep(DEFAULT_SLEEP) # prevent race condition with previous step
|
||||
except Exception:
|
||||
# if namespace not found
|
||||
pass
|
||||
|
@ -1,4 +1,10 @@
|
||||
from typing import Any, Type
|
||||
from unittest.mock import patch
|
||||
|
||||
import aiohttp
|
||||
import pytest
|
||||
from langchain_core.utils import convert_to_secret_str
|
||||
from langchain_tests.unit_tests.embeddings import EmbeddingsTests
|
||||
|
||||
from langchain_pinecone import PineconeEmbeddings
|
||||
|
||||
@ -6,23 +12,72 @@ API_KEY = convert_to_secret_str("NOT_A_VALID_KEY")
|
||||
MODEL_NAME = "multilingual-e5-large"
|
||||
|
||||
|
||||
def test_default_config() -> None:
|
||||
e = PineconeEmbeddings(
|
||||
pinecone_api_key=API_KEY, # type: ignore[call-arg]
|
||||
model=MODEL_NAME,
|
||||
)
|
||||
assert e.batch_size == 96
|
||||
|
||||
|
||||
def test_default_config_with_api_key() -> None:
|
||||
e = PineconeEmbeddings(api_key=API_KEY, model=MODEL_NAME)
|
||||
assert e.batch_size == 96
|
||||
|
||||
|
||||
def test_custom_config() -> None:
|
||||
e = PineconeEmbeddings(
|
||||
pinecone_api_key=API_KEY, # type: ignore[call-arg]
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_pinecone() -> Any:
|
||||
"""Mock Pinecone client for all tests."""
|
||||
with patch("langchain_pinecone.embeddings.PineconeClient") as mock:
|
||||
yield mock
|
||||
|
||||
|
||||
class TestPineconeEmbeddingsStandard(EmbeddingsTests):
|
||||
"""Standard LangChain embeddings tests."""
|
||||
|
||||
@property
|
||||
def embeddings_class(self) -> Type[PineconeEmbeddings]:
|
||||
"""Get the class under test."""
|
||||
return PineconeEmbeddings
|
||||
|
||||
@property
|
||||
def embedding_model_params(self) -> dict:
|
||||
"""Get the parameters for initializing the embeddings model."""
|
||||
return {
|
||||
"model": MODEL_NAME,
|
||||
"pinecone_api_key": API_KEY,
|
||||
}
|
||||
|
||||
|
||||
class TestPineconeEmbeddingsConfig:
|
||||
"""Additional configuration tests for PineconeEmbeddings."""
|
||||
|
||||
def test_default_config(self) -> None:
|
||||
"""Test default configuration is set correctly."""
|
||||
embeddings = PineconeEmbeddings(model=MODEL_NAME, pinecone_api_key=API_KEY) # type: ignore
|
||||
assert embeddings.batch_size == 96
|
||||
assert embeddings.query_params == {"input_type": "query", "truncation": "END"}
|
||||
assert embeddings.document_params == {
|
||||
"input_type": "passage",
|
||||
"truncation": "END",
|
||||
}
|
||||
assert embeddings.dimension == 1024
|
||||
|
||||
def test_custom_config(self) -> None:
|
||||
"""Test custom configuration overrides defaults."""
|
||||
embeddings = PineconeEmbeddings(
|
||||
model=MODEL_NAME,
|
||||
api_key=API_KEY,
|
||||
batch_size=128,
|
||||
query_params={"custom": "param"},
|
||||
document_params={"other": "param"},
|
||||
)
|
||||
assert e.batch_size == 128
|
||||
assert embeddings.batch_size == 128
|
||||
assert embeddings.query_params == {"custom": "param"}
|
||||
assert embeddings.document_params == {"other": "param"}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_client_initialization(self) -> None:
|
||||
"""Test async client is initialized correctly and only when needed."""
|
||||
embeddings = PineconeEmbeddings(model=MODEL_NAME, api_key=API_KEY)
|
||||
assert embeddings._async_client is None
|
||||
|
||||
# Access async_client property
|
||||
client = embeddings.async_client
|
||||
assert client is not None
|
||||
assert isinstance(client, aiohttp.ClientSession)
|
||||
|
||||
# Ensure headers are set correctly
|
||||
expected_headers = {
|
||||
"Api-Key": API_KEY.get_secret_value(),
|
||||
"Content-Type": "application/json",
|
||||
"X-Pinecone-API-Version": "2024-10",
|
||||
}
|
||||
assert client._default_headers == expected_headers
|
||||
|
@ -1,6 +1,6 @@
|
||||
from unittest.mock import Mock
|
||||
|
||||
from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore
|
||||
from langchain_pinecone.vectorstores import PineconeVectorStore
|
||||
|
||||
|
||||
def test_initialization() -> None:
|
||||
@ -9,7 +9,7 @@ def test_initialization() -> None:
|
||||
index = Mock()
|
||||
embedding = Mock()
|
||||
text_key = "xyz"
|
||||
Pinecone(index, embedding, text_key)
|
||||
PineconeVectorStore(index, embedding, text_key)
|
||||
|
||||
|
||||
def test_id_prefix() -> None:
|
||||
|
Loading…
Reference in New Issue
Block a user