From 17e42bbd18827a0163022b3e2f3120c09d9bc486 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 8 May 2024 23:21:30 +0200 Subject: [PATCH] community[patch]: pgvector: Slight refactoring to make code a bit more reusable (#16243) - **Description:** Improve [pgvector vector store adapter](https://github.com/langchain-ai/langchain/blob/v0.1.1/libs/community/langchain_community/vectorstores/pgvector.py) to make it reusable by adapters deriving from that. - **Issue:** NA - **Dependencies:** NA - **References:** https://github.com/crate-workbench/langchain/pull/1 - **Addressed to:** @eyurtsev, @cbornet Hi from the CrateDB team, first of all, thanks a stack for conceiving and maintaining LangChain. We are currently [preparing a patch](https://github.com/crate-workbench/langchain/pull/1) for adding [CrateDB](https://github.com/crate/crate) to the list of community adapters. Because CrateDB aims to be compatible with PostgreSQL to some degree, the vector store subsystem in LangChain derives functionality from the corresponding implementation for pgvector. Therefore, in order to make the implementation more reusable, we needed to rename the private methods `__from` and `__query_collection` to the less private counterparts `_from` and `_query_collection`, so they can be overwritten, in order to unlock other adapters deriving from [pgvector](https://github.com/langchain-ai/langchain/blob/v0.1.1/libs/community/langchain_community/vectorstores/pgvector.py). With kind regards, Andreas. --- .../langchain_community/vectorstores/pgvector.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/community/langchain_community/vectorstores/pgvector.py b/libs/community/langchain_community/vectorstores/pgvector.py index af0f9ad32ff..7a8a49bc48d 100644 --- a/libs/community/langchain_community/vectorstores/pgvector.py +++ b/libs/community/langchain_community/vectorstores/pgvector.py @@ -455,7 +455,7 @@ class PGVector(VectorStore): return self.CollectionStore.get_by_name(session, self.collection_name) @classmethod - def __from( + def _from( cls, texts: List[str], embeddings: List[List[float]], @@ -623,7 +623,7 @@ class PGVector(VectorStore): k: int = 4, filter: Optional[dict] = None, ) -> List[Tuple[Document, float]]: - results = self.__query_collection(embedding=embedding, k=k, filter=filter) + results = self._query_collection(embedding=embedding, k=k, filter=filter) return self._results_to_docs_and_scores(results) @@ -922,7 +922,7 @@ class PGVector(VectorStore): f"Invalid type: Expected a dictionary but got type: {type(filters)}" ) - def __query_collection( + def _query_collection( self, embedding: List[float], k: int = 4, @@ -1008,7 +1008,7 @@ class PGVector(VectorStore): """ embeddings = embedding.embed_documents(list(texts)) - return cls.__from( + return cls._from( texts, embeddings, embedding, @@ -1054,7 +1054,7 @@ class PGVector(VectorStore): texts = [t[0] for t in text_embeddings] embeddings = [t[1] for t in text_embeddings] - return cls.__from( + return cls._from( texts, embeddings, embedding, @@ -1218,7 +1218,7 @@ class PGVector(VectorStore): List[Tuple[Document, float]]: List of Documents selected by maximal marginal relevance to the query and score for each. """ - results = self.__query_collection(embedding=embedding, k=fetch_k, filter=filter) + results = self._query_collection(embedding=embedding, k=fetch_k, filter=filter) embedding_list = [result.EmbeddingStore.embedding for result in results]