mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-08 14:31:55 +00:00
Pinecone: Add V4 support (#7473)
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import importlib
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from typing import List
|
||||
|
||||
@@ -11,7 +12,6 @@ from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.vectorstores.pinecone import Pinecone
|
||||
|
||||
index_name = "langchain-test-index" # name of the index
|
||||
namespace_name = "langchain-test-namespace" # name of the namespace
|
||||
dimension = 1536 # dimension of the embeddings
|
||||
|
||||
|
||||
@@ -39,40 +39,28 @@ class TestPinecone:
|
||||
cls.index = pinecone.Index(index_name)
|
||||
|
||||
if index_name in pinecone.list_indexes():
|
||||
index_stats = cls.index.describe_index_stats()
|
||||
if index_stats["dimension"] == dimension:
|
||||
# delete all the vectors in the index if the dimension is the same
|
||||
# from all namespaces
|
||||
index_stats = cls.index.describe_index_stats()
|
||||
for _namespace_name in index_stats["namespaces"].keys():
|
||||
cls.index.delete(delete_all=True, namespace=_namespace_name)
|
||||
pinecone.delete_index(index_name)
|
||||
|
||||
else:
|
||||
pinecone.delete_index(index_name)
|
||||
pinecone.create_index(name=index_name, dimension=dimension)
|
||||
else:
|
||||
pinecone.create_index(name=index_name, dimension=dimension)
|
||||
pinecone.create_index(name=index_name, dimension=dimension)
|
||||
|
||||
# insure the index is empty
|
||||
index_stats = cls.index.describe_index_stats()
|
||||
assert index_stats["dimension"] == dimension
|
||||
if index_stats["namespaces"].get(namespace_name) is not None:
|
||||
assert index_stats["namespaces"][namespace_name]["vector_count"] == 0
|
||||
assert index_stats["total_vector_count"] == 0
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls) -> None:
|
||||
index_stats = cls.index.describe_index_stats()
|
||||
for _namespace_name in index_stats["namespaces"].keys():
|
||||
cls.index.delete(delete_all=True, namespace=_namespace_name)
|
||||
if index_name in pinecone.list_indexes():
|
||||
pinecone.delete_index(index_name)
|
||||
pinecone.create_index(index_name, dimension=dimension)
|
||||
|
||||
reset_pinecone()
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self) -> None:
|
||||
# delete all the vectors in the index
|
||||
index_stats = self.index.describe_index_stats()
|
||||
for _namespace_name in index_stats["namespaces"].keys():
|
||||
self.index.delete(delete_all=True, namespace=_namespace_name)
|
||||
if index_name in pinecone.list_indexes():
|
||||
pinecone.delete_index(index_name)
|
||||
pinecone.create_index(index_name, dimension=dimension)
|
||||
|
||||
reset_pinecone()
|
||||
|
||||
@@ -86,12 +74,11 @@ class TestPinecone:
|
||||
texts.insert(0, needs)
|
||||
|
||||
docsearch = Pinecone.from_texts(
|
||||
texts=texts,
|
||||
embedding=embedding_openai,
|
||||
index_name=index_name,
|
||||
namespace=namespace_name,
|
||||
texts=texts, embedding=embedding_openai, index_name=index_name
|
||||
)
|
||||
output = docsearch.similarity_search(unique_id, k=1, namespace=namespace_name)
|
||||
# wait for the index to be ready
|
||||
time.sleep(20)
|
||||
output = docsearch.similarity_search(unique_id, k=1)
|
||||
assert output == [Document(page_content=needs)]
|
||||
|
||||
@pytest.mark.vcr()
|
||||
@@ -110,9 +97,10 @@ class TestPinecone:
|
||||
embedding_openai,
|
||||
index_name=index_name,
|
||||
metadatas=metadatas,
|
||||
namespace=namespace_name,
|
||||
)
|
||||
output = docsearch.similarity_search(needs, k=1, namespace=namespace_name)
|
||||
# wait for the index to be ready
|
||||
time.sleep(20)
|
||||
output = docsearch.similarity_search(needs, k=1)
|
||||
|
||||
# TODO: why metadata={"page": 0.0}) instead of {"page": 0}?
|
||||
assert output == [Document(page_content=needs, metadata={"page": 0.0})]
|
||||
@@ -127,11 +115,10 @@ class TestPinecone:
|
||||
embedding_openai,
|
||||
index_name=index_name,
|
||||
metadatas=metadatas,
|
||||
namespace=namespace_name,
|
||||
)
|
||||
output = docsearch.similarity_search_with_score(
|
||||
"foo", k=3, namespace=namespace_name
|
||||
)
|
||||
# wait for the index to be ready
|
||||
time.sleep(20)
|
||||
output = docsearch.similarity_search_with_score("foo", k=3)
|
||||
docs = [o[0] for o in output]
|
||||
scores = [o[1] for o in output]
|
||||
sorted_documents = sorted(docs, key=lambda x: x.metadata["page"])
|
||||
@@ -144,57 +131,17 @@ class TestPinecone:
|
||||
]
|
||||
assert scores[0] > scores[1] > scores[2]
|
||||
|
||||
def test_from_existing_index_with_namespaces(
|
||||
self, embedding_openai: OpenAIEmbeddings
|
||||
) -> None:
|
||||
"""Test that namespaces are properly handled."""
|
||||
# Create two indexes with the same name but different namespaces
|
||||
texts_1 = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts_1))]
|
||||
Pinecone.from_texts(
|
||||
texts_1,
|
||||
embedding_openai,
|
||||
index_name=index_name,
|
||||
metadatas=metadatas,
|
||||
namespace=f"{index_name}-1",
|
||||
)
|
||||
|
||||
texts_2 = ["foo2", "bar2", "baz2"]
|
||||
metadatas = [{"page": i} for i in range(len(texts_2))]
|
||||
|
||||
Pinecone.from_texts(
|
||||
texts_2,
|
||||
embedding_openai,
|
||||
index_name=index_name,
|
||||
metadatas=metadatas,
|
||||
namespace=f"{index_name}-2",
|
||||
)
|
||||
|
||||
# Search with namespace
|
||||
docsearch = Pinecone.from_existing_index(
|
||||
index_name=index_name,
|
||||
embedding=embedding_openai,
|
||||
namespace=f"{index_name}-1",
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=20, namespace=f"{index_name}-1")
|
||||
# check that we don't get results from the other namespace
|
||||
page_contents = sorted(set([o.page_content for o in output]))
|
||||
assert all(content in ["foo", "bar", "baz"] for content in page_contents)
|
||||
assert all(content not in ["foo2", "bar2", "baz2"] for content in page_contents)
|
||||
|
||||
def test_add_documents_with_ids(
|
||||
self, texts: List[str], embedding_openai: OpenAIEmbeddings
|
||||
) -> None:
|
||||
ids = [uuid.uuid4().hex for _ in range(len(texts))]
|
||||
Pinecone.from_texts(
|
||||
texts=texts,
|
||||
ids=ids,
|
||||
embedding=embedding_openai,
|
||||
index_name=index_name,
|
||||
namespace=index_name,
|
||||
texts=texts, ids=ids, embedding=embedding_openai, index_name=index_name
|
||||
)
|
||||
# wait for the index to be ready
|
||||
time.sleep(20)
|
||||
index_stats = self.index.describe_index_stats()
|
||||
assert index_stats["namespaces"][index_name]["vector_count"] == len(texts)
|
||||
assert index_stats["total_vector_count"] == len(texts)
|
||||
|
||||
ids_1 = [uuid.uuid4().hex for _ in range(len(texts))]
|
||||
Pinecone.from_texts(
|
||||
@@ -202,7 +149,8 @@ class TestPinecone:
|
||||
ids=ids_1,
|
||||
embedding=embedding_openai,
|
||||
index_name=index_name,
|
||||
namespace=index_name,
|
||||
)
|
||||
# wait for the index to be ready
|
||||
time.sleep(20)
|
||||
index_stats = self.index.describe_index_stats()
|
||||
assert index_stats["namespaces"][index_name]["vector_count"] == len(texts) * 2
|
||||
assert index_stats["total_vector_count"] == len(texts) * 2
|
||||
|
Reference in New Issue
Block a user