mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-28 09:28:48 +00:00
use namespace argument in Pinecone constructor (#1757)
Fix #1756 Use the `namespace` argument of `Pinecone.from_exisiting_index` to set the default value of `namespace` for other methods. Leads to more expected behavior and easier integration in chains. For the test, I've added a line to delete and rebuild the `langchain-demo` index at the beginning of the test. I'm not 100% sure if it's a good idea but it makes the test reproducible.
This commit is contained in:
parent
280cb4160d
commit
3701b2901e
@ -32,6 +32,7 @@ class Pinecone(VectorStore):
|
||||
index: Any,
|
||||
embedding_function: Callable,
|
||||
text_key: str,
|
||||
namespace: Optional[str] = None,
|
||||
):
|
||||
"""Initialize with Pinecone client."""
|
||||
try:
|
||||
@ -49,6 +50,7 @@ class Pinecone(VectorStore):
|
||||
self._index = index
|
||||
self._embedding_function = embedding_function
|
||||
self._text_key = text_key
|
||||
self._namespace = namespace
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
@ -71,6 +73,8 @@ class Pinecone(VectorStore):
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = self._namespace
|
||||
# Embed and create the documents
|
||||
docs = []
|
||||
ids = ids or [str(uuid.uuid4()) for _ in texts]
|
||||
@ -101,6 +105,8 @@ class Pinecone(VectorStore):
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = self._namespace
|
||||
query_obj = self._embedding_function(query)
|
||||
docs = []
|
||||
results = self._index.query(
|
||||
@ -135,6 +141,8 @@ class Pinecone(VectorStore):
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = self._namespace
|
||||
query_obj = self._embedding_function(query)
|
||||
docs = []
|
||||
results = self._index.query(
|
||||
@ -222,7 +230,7 @@ class Pinecone(VectorStore):
|
||||
index = pinecone.Index(_index_name)
|
||||
# upsert to Pinecone
|
||||
index.upsert(vectors=list(to_upsert), namespace=namespace)
|
||||
return cls(index, embedding.embed_query, text_key)
|
||||
return cls(index, embedding.embed_query, text_key, namespace)
|
||||
|
||||
@classmethod
|
||||
def from_existing_index(
|
||||
@ -242,5 +250,5 @@ class Pinecone(VectorStore):
|
||||
)
|
||||
|
||||
return cls(
|
||||
pinecone.Index(index_name, namespace), embedding.embed_query, text_key
|
||||
pinecone.Index(index_name), embedding.embed_query, text_key, namespace
|
||||
)
|
||||
|
@ -7,6 +7,11 @@ from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||
|
||||
pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENV")
|
||||
|
||||
# if the index already exists, delete it
|
||||
try:
|
||||
pinecone.delete_index("langchain-demo")
|
||||
except Exception:
|
||||
pass
|
||||
index = pinecone.Index("langchain-demo")
|
||||
|
||||
|
||||
@ -57,3 +62,36 @@ def test_pinecone_with_scores() -> None:
|
||||
Document(page_content="baz", metadata={"page": 2}),
|
||||
]
|
||||
assert scores[0] > scores[1] > scores[2]
|
||||
|
||||
|
||||
def test_pinecone_with_namespaces() -> None:
|
||||
"Test that namespaces are properly handled." ""
|
||||
# Create two indexes with the same name but different namespaces
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
Pinecone.from_texts(
|
||||
texts,
|
||||
FakeEmbeddings(),
|
||||
index_name="langchain-demo",
|
||||
metadatas=metadatas,
|
||||
namespace="test-namespace",
|
||||
)
|
||||
|
||||
texts = ["foo2", "bar2", "baz2"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
Pinecone.from_texts(
|
||||
texts,
|
||||
FakeEmbeddings(),
|
||||
index_name="langchain-demo",
|
||||
metadatas=metadatas,
|
||||
namespace="test-namespace2",
|
||||
)
|
||||
|
||||
# Search with namespace
|
||||
docsearch = Pinecone.from_existing_index(
|
||||
"langchain-demo", embedding=FakeEmbeddings(), namespace="test-namespace"
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=6)
|
||||
# check that we don't get results from the other namespace
|
||||
page_contents = [o.page_content for o in output]
|
||||
assert set(page_contents) == set(["foo", "bar", "baz"])
|
||||
|
Loading…
Reference in New Issue
Block a user