diff --git a/langchain/vectorstores/pinecone.py b/langchain/vectorstores/pinecone.py index a577e9af4d8..2b5c5bf2ecc 100644 --- a/langchain/vectorstores/pinecone.py +++ b/langchain/vectorstores/pinecone.py @@ -40,7 +40,6 @@ class Pinecone(VectorStore): index: Any, embedding_function: Callable, text_key: str, - namespace: Optional[str] = None, ): """Initialize with Pinecone client.""" try: @@ -58,14 +57,12 @@ class Pinecone(VectorStore): self._index = index self._embedding_function = embedding_function self._text_key = text_key - self._namespace = namespace def add_texts( self, texts: Iterable[str], metadatas: Optional[List[dict]] = None, ids: Optional[List[str]] = None, - namespace: Optional[str] = None, batch_size: int = 32, **kwargs: Any, ) -> List[str]: @@ -75,14 +72,11 @@ class Pinecone(VectorStore): texts: Iterable of strings to add to the vectorstore. metadatas: Optional list of metadatas associated with the texts. ids: Optional list of ids to associate with the texts. - namespace: Optional pinecone namespace to add the texts to. Returns: List of ids from adding the texts into the vectorstore. """ - if namespace is None: - namespace = self._namespace # Embed and create the documents docs = [] ids = ids or [str(uuid.uuid4()) for _ in texts] @@ -92,7 +86,7 @@ class Pinecone(VectorStore): metadata[self._text_key] = text docs.append((ids[i], embedding, metadata)) # upsert to Pinecone - self._index.upsert(vectors=docs, namespace=namespace, batch_size=batch_size) + self._index.upsert(vectors=docs, batch_size=batch_size) return ids def similarity_search_with_score( @@ -100,7 +94,6 @@ class Pinecone(VectorStore): query: str, k: int = 4, filter: Optional[dict] = None, - namespace: Optional[str] = None, ) -> List[Tuple[Document, float]]: """Return pinecone documents most similar to query, along with scores. @@ -108,20 +101,16 @@ class Pinecone(VectorStore): query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Dictionary of argument(s) to filter on metadata - namespace: Namespace to search in. Default will search in '' namespace. Returns: List of Documents most similar to the query and score for each """ - if namespace is None: - namespace = self._namespace query_obj = self._embedding_function(query) docs = [] results = self._index.query( [query_obj], top_k=k, include_metadata=True, - namespace=namespace, filter=filter, ) for res in results["matches"]: @@ -141,7 +130,6 @@ class Pinecone(VectorStore): query: str, k: int = 4, filter: Optional[dict] = None, - namespace: Optional[str] = None, **kwargs: Any, ) -> List[Document]: """Return pinecone documents most similar to query. @@ -150,13 +138,12 @@ class Pinecone(VectorStore): query: Text to look up documents similar to. k: Number of Documents to return. Defaults to 4. filter: Dictionary of argument(s) to filter on metadata - namespace: Namespace to search in. Default will search in '' namespace. Returns: List of Documents most similar to the query and score for each """ docs_and_scores = self.similarity_search_with_score( - query, k=k, filter=filter, namespace=namespace, **kwargs + query, k=k, filter=filter, **kwargs ) return [doc for doc, _ in docs_and_scores] @@ -176,7 +163,6 @@ class Pinecone(VectorStore): fetch_k: int = 20, lambda_mult: float = 0.5, filter: Optional[dict] = None, - namespace: Optional[str] = None, **kwargs: Any, ) -> List[Document]: """Return docs selected using the maximal marginal relevance. @@ -195,14 +181,11 @@ class Pinecone(VectorStore): Returns: List of Documents selected by maximal marginal relevance. """ - if namespace is None: - namespace = self._namespace results = self._index.query( [embedding], top_k=fetch_k, include_values=True, include_metadata=True, - namespace=namespace, filter=filter, ) mmr_selected = maximal_marginal_relevance( @@ -224,7 +207,6 @@ class Pinecone(VectorStore): fetch_k: int = 20, lambda_mult: float = 0.5, filter: Optional[dict] = None, - namespace: Optional[str] = None, **kwargs: Any, ) -> List[Document]: """Return docs selected using the maximal marginal relevance. @@ -245,7 +227,7 @@ class Pinecone(VectorStore): """ embedding = self._embedding_function(query) return self.max_marginal_relevance_search_by_vector( - embedding, k, fetch_k, lambda_mult, filter, namespace + embedding, k, fetch_k, lambda_mult, filter ) @classmethod @@ -258,7 +240,6 @@ class Pinecone(VectorStore): batch_size: int = 32, text_key: str = "text", index_name: Optional[str] = None, - namespace: Optional[str] = None, **kwargs: Any, ) -> Pinecone: """Construct Pinecone wrapper from raw documents. @@ -331,8 +312,8 @@ class Pinecone(VectorStore): to_upsert = zip(ids_batch, embeds, metadata) # upsert to Pinecone - index.upsert(vectors=list(to_upsert), namespace=namespace) - return cls(index, embedding.embed_query, text_key, namespace) + index.upsert(vectors=list(to_upsert)) + return cls(index, embedding.embed_query, text_key) @classmethod def from_existing_index( @@ -340,7 +321,6 @@ class Pinecone(VectorStore): index_name: str, embedding: Embeddings, text_key: str = "text", - namespace: Optional[str] = None, ) -> Pinecone: """Load pinecone vectorstore from index name.""" try: @@ -350,38 +330,21 @@ class Pinecone(VectorStore): "Could not import pinecone python package. " "Please install it with `pip install pinecone-client`." ) - - return cls( - pinecone.Index(index_name), embedding.embed_query, text_key, namespace - ) + return cls(pinecone.Index(index_name), embedding.embed_query, text_key) def delete( self, ids: Optional[List[str]] = None, - delete_all: Optional[bool] = None, - namespace: Optional[str] = None, - filter: Optional[dict] = None, **kwargs: Any, ) -> None: - """Delete by vector IDs or filter. + """Delete by vector IDs Args: ids: List of ids to delete. - filter: Dictionary of conditions to filter vectors to delete. """ + if ids is None: + raise ValueError("Ids must be provided.") - if namespace is None: - namespace = self._namespace - - if delete_all: - self._index.delete(delete_all=True, namespace=namespace, **kwargs) - elif ids is not None: - chunk_size = 1000 - for i in range(0, len(ids), chunk_size): - chunk = ids[i : i + chunk_size] - self._index.delete(ids=chunk, namespace=namespace, **kwargs) - elif filter is not None: - self._index.delete(filter=filter, namespace=namespace, **kwargs) - else: - raise ValueError("Either ids, delete_all, or filter must be provided.") - - return None + chunk_size = 1000 + for i in range(0, len(ids), chunk_size): + chunk = ids[i : i + chunk_size] + self._index.delete(ids=chunk, **kwargs) diff --git a/tests/integration_tests/vectorstores/test_pinecone.py b/tests/integration_tests/vectorstores/test_pinecone.py index 4a6a8fb1df2..d66ddf9a760 100644 --- a/tests/integration_tests/vectorstores/test_pinecone.py +++ b/tests/integration_tests/vectorstores/test_pinecone.py @@ -1,5 +1,6 @@ import importlib import os +import time import uuid from typing import List @@ -11,7 +12,6 @@ from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores.pinecone import Pinecone index_name = "langchain-test-index" # name of the index -namespace_name = "langchain-test-namespace" # name of the namespace dimension = 1536 # dimension of the embeddings @@ -39,40 +39,28 @@ class TestPinecone: cls.index = pinecone.Index(index_name) if index_name in pinecone.list_indexes(): - index_stats = cls.index.describe_index_stats() - if index_stats["dimension"] == dimension: - # delete all the vectors in the index if the dimension is the same - # from all namespaces - index_stats = cls.index.describe_index_stats() - for _namespace_name in index_stats["namespaces"].keys(): - cls.index.delete(delete_all=True, namespace=_namespace_name) + pinecone.delete_index(index_name) - else: - pinecone.delete_index(index_name) - pinecone.create_index(name=index_name, dimension=dimension) - else: - pinecone.create_index(name=index_name, dimension=dimension) + pinecone.create_index(name=index_name, dimension=dimension) # insure the index is empty index_stats = cls.index.describe_index_stats() assert index_stats["dimension"] == dimension - if index_stats["namespaces"].get(namespace_name) is not None: - assert index_stats["namespaces"][namespace_name]["vector_count"] == 0 + assert index_stats["total_vector_count"] == 0 @classmethod def teardown_class(cls) -> None: - index_stats = cls.index.describe_index_stats() - for _namespace_name in index_stats["namespaces"].keys(): - cls.index.delete(delete_all=True, namespace=_namespace_name) + if index_name in pinecone.list_indexes(): + pinecone.delete_index(index_name) + pinecone.create_index(index_name, dimension=dimension) reset_pinecone() @pytest.fixture(autouse=True) def setup(self) -> None: - # delete all the vectors in the index - index_stats = self.index.describe_index_stats() - for _namespace_name in index_stats["namespaces"].keys(): - self.index.delete(delete_all=True, namespace=_namespace_name) + if index_name in pinecone.list_indexes(): + pinecone.delete_index(index_name) + pinecone.create_index(index_name, dimension=dimension) reset_pinecone() @@ -86,12 +74,11 @@ class TestPinecone: texts.insert(0, needs) docsearch = Pinecone.from_texts( - texts=texts, - embedding=embedding_openai, - index_name=index_name, - namespace=namespace_name, + texts=texts, embedding=embedding_openai, index_name=index_name ) - output = docsearch.similarity_search(unique_id, k=1, namespace=namespace_name) + # wait for the index to be ready + time.sleep(20) + output = docsearch.similarity_search(unique_id, k=1) assert output == [Document(page_content=needs)] @pytest.mark.vcr() @@ -110,9 +97,10 @@ class TestPinecone: embedding_openai, index_name=index_name, metadatas=metadatas, - namespace=namespace_name, ) - output = docsearch.similarity_search(needs, k=1, namespace=namespace_name) + # wait for the index to be ready + time.sleep(20) + output = docsearch.similarity_search(needs, k=1) # TODO: why metadata={"page": 0.0}) instead of {"page": 0}? assert output == [Document(page_content=needs, metadata={"page": 0.0})] @@ -127,11 +115,10 @@ class TestPinecone: embedding_openai, index_name=index_name, metadatas=metadatas, - namespace=namespace_name, - ) - output = docsearch.similarity_search_with_score( - "foo", k=3, namespace=namespace_name ) + # wait for the index to be ready + time.sleep(20) + output = docsearch.similarity_search_with_score("foo", k=3) docs = [o[0] for o in output] scores = [o[1] for o in output] sorted_documents = sorted(docs, key=lambda x: x.metadata["page"]) @@ -144,57 +131,17 @@ class TestPinecone: ] assert scores[0] > scores[1] > scores[2] - def test_from_existing_index_with_namespaces( - self, embedding_openai: OpenAIEmbeddings - ) -> None: - """Test that namespaces are properly handled.""" - # Create two indexes with the same name but different namespaces - texts_1 = ["foo", "bar", "baz"] - metadatas = [{"page": i} for i in range(len(texts_1))] - Pinecone.from_texts( - texts_1, - embedding_openai, - index_name=index_name, - metadatas=metadatas, - namespace=f"{index_name}-1", - ) - - texts_2 = ["foo2", "bar2", "baz2"] - metadatas = [{"page": i} for i in range(len(texts_2))] - - Pinecone.from_texts( - texts_2, - embedding_openai, - index_name=index_name, - metadatas=metadatas, - namespace=f"{index_name}-2", - ) - - # Search with namespace - docsearch = Pinecone.from_existing_index( - index_name=index_name, - embedding=embedding_openai, - namespace=f"{index_name}-1", - ) - output = docsearch.similarity_search("foo", k=20, namespace=f"{index_name}-1") - # check that we don't get results from the other namespace - page_contents = sorted(set([o.page_content for o in output])) - assert all(content in ["foo", "bar", "baz"] for content in page_contents) - assert all(content not in ["foo2", "bar2", "baz2"] for content in page_contents) - def test_add_documents_with_ids( self, texts: List[str], embedding_openai: OpenAIEmbeddings ) -> None: ids = [uuid.uuid4().hex for _ in range(len(texts))] Pinecone.from_texts( - texts=texts, - ids=ids, - embedding=embedding_openai, - index_name=index_name, - namespace=index_name, + texts=texts, ids=ids, embedding=embedding_openai, index_name=index_name ) + # wait for the index to be ready + time.sleep(20) index_stats = self.index.describe_index_stats() - assert index_stats["namespaces"][index_name]["vector_count"] == len(texts) + assert index_stats["total_vector_count"] == len(texts) ids_1 = [uuid.uuid4().hex for _ in range(len(texts))] Pinecone.from_texts( @@ -202,7 +149,8 @@ class TestPinecone: ids=ids_1, embedding=embedding_openai, index_name=index_name, - namespace=index_name, ) + # wait for the index to be ready + time.sleep(20) index_stats = self.index.describe_index_stats() - assert index_stats["namespaces"][index_name]["vector_count"] == len(texts) * 2 + assert index_stats["total_vector_count"] == len(texts) * 2