mirror of
https://github.com/hwchase17/langchain.git
synced 2025-04-27 11:41:51 +00:00
Title: langchain-pinecone: improve test structure and async handling Description: This PR improves the test infrastructure for the langchain-pinecone package by: 1. Implementing LangChain's standard test patterns for embeddings 2. Adding comprehensive configuration testing 3. Improving async test coverage 4. Fixing integration test issues with namespaces and async markers The changes make the tests more robust, maintainable, and aligned with LangChain's testing standards while ensuring proper async behavior in the embeddings implementation. Key improvements: - Added standard EmbeddingsTests implementation - Split custom configuration tests into a separate test class - Added proper async test coverage with pytest-asyncio - Fixed namespace handling in vector store integration tests - Improved test organization and documentation Dependencies: None (uses existing test dependencies) Tests and Documentation: - ✅ Added standard test implementation following LangChain's patterns - ✅ Added comprehensive unit tests for configuration and async behavior - ✅ All tests passing locally - No documentation changes needed (internal test improvements only) Twitter handle: N/A --------- Co-authored-by: Erick Friis <erick@langchain.dev>
85 lines
2.6 KiB
Python
85 lines
2.6 KiB
Python
import time
|
|
from typing import AsyncGenerator
|
|
|
|
import pytest
|
|
from langchain_core.documents import Document
|
|
from pinecone import Pinecone, ServerlessSpec # type: ignore
|
|
|
|
from langchain_pinecone import PineconeEmbeddings, PineconeVectorStore
|
|
from tests.integration_tests.test_vectorstores import DEFAULT_SLEEP
|
|
|
|
DIMENSION = 1024
|
|
INDEX_NAME = "langchain-pinecone-embeddings"
|
|
MODEL = "multilingual-e5-large"
|
|
NAMESPACE_NAME = "test_namespace"
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
async def embd_client() -> AsyncGenerator[PineconeEmbeddings, None]:
|
|
client = PineconeEmbeddings(model=MODEL)
|
|
yield client
|
|
await client.async_client.close()
|
|
|
|
|
|
@pytest.fixture
|
|
def pc() -> Pinecone:
|
|
return Pinecone()
|
|
|
|
|
|
@pytest.fixture()
|
|
def pc_index(pc: Pinecone) -> Pinecone.Index:
|
|
if INDEX_NAME not in [index["name"] for index in pc.list_indexes()]:
|
|
pc.create_index(
|
|
name=INDEX_NAME,
|
|
dimension=DIMENSION,
|
|
metric="cosine",
|
|
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
|
|
)
|
|
while not pc.describe_index(INDEX_NAME).status["ready"]:
|
|
time.sleep(1)
|
|
|
|
yield pc.Index(INDEX_NAME)
|
|
|
|
pc.delete_index(INDEX_NAME)
|
|
|
|
|
|
def test_embed_query(embd_client: PineconeEmbeddings) -> None:
|
|
out = embd_client.embed_query("Hello, world!")
|
|
assert isinstance(out, list)
|
|
assert len(out) == DIMENSION
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_aembed_query(embd_client: PineconeEmbeddings) -> None:
|
|
out = await embd_client.aembed_query("Hello, world!")
|
|
assert isinstance(out, list)
|
|
assert len(out) == DIMENSION
|
|
|
|
|
|
def test_embed_documents(embd_client: PineconeEmbeddings) -> None:
|
|
out = embd_client.embed_documents(["Hello, world!", "This is a test."])
|
|
assert isinstance(out, list)
|
|
assert len(out) == 2
|
|
assert len(out[0]) == DIMENSION
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_aembed_documents(embd_client: PineconeEmbeddings) -> None:
|
|
out = await embd_client.aembed_documents(["Hello, world!", "This is a test."])
|
|
assert isinstance(out, list)
|
|
assert len(out) == 2
|
|
assert len(out[0]) == DIMENSION
|
|
|
|
|
|
def test_vector_store(
|
|
embd_client: PineconeEmbeddings, pc_index: Pinecone.Index
|
|
) -> None:
|
|
vectorstore = PineconeVectorStore(index_name=INDEX_NAME, embedding=embd_client)
|
|
vectorstore.add_documents(
|
|
[Document("Hello, world!"), Document("This is a test.")],
|
|
namespace=NAMESPACE_NAME,
|
|
)
|
|
time.sleep(DEFAULT_SLEEP) # Increase wait time to ensure indexing is complete
|
|
resp = vectorstore.similarity_search(query="hello", namespace=NAMESPACE_NAME)
|
|
assert len(resp) == 2
|