mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-17 08:29:28 +00:00
optimize pgvector add_texts
(#7185)
- Description: At the moment, inserting new embeddings to pgvector is querying all embeddings every time as the defined `embeddings` relationship is using the default params, which sets `lazy="select"`. This change drastically improves the performance and adds a few additional cleanups: * remove `collection.embeddings.append` as it was querying all embeddings on insert, replace with `collection_id` param * centralize storing logic in add_embeddings function to reduce duplication * remove boilerplate - Issue: No issue was opened. - Dependencies: None. - Tag maintainer: this is a vectorstore update, so I think @rlancemartin, @eyurtsev - Twitter handle: @falmannaa
This commit is contained in:
parent
6711854e30
commit
79b59a8e06
@ -207,12 +207,6 @@ class PGVector(VectorStore):
|
|||||||
pre_delete_collection: bool = False,
|
pre_delete_collection: bool = False,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> PGVector:
|
) -> PGVector:
|
||||||
if ids is None:
|
|
||||||
ids = [str(uuid.uuid1()) for _ in texts]
|
|
||||||
|
|
||||||
if not metadatas:
|
|
||||||
metadatas = [{} for _ in texts]
|
|
||||||
|
|
||||||
connection_string = cls.get_connection_string(kwargs)
|
connection_string = cls.get_connection_string(kwargs)
|
||||||
|
|
||||||
store = cls(
|
store = cls(
|
||||||
@ -231,12 +225,12 @@ class PGVector(VectorStore):
|
|||||||
|
|
||||||
def add_embeddings(
|
def add_embeddings(
|
||||||
self,
|
self,
|
||||||
texts: List[str],
|
texts: Iterable[str],
|
||||||
embeddings: List[List[float]],
|
embeddings: List[List[float]],
|
||||||
metadatas: List[dict],
|
metadatas: Optional[List[dict]] = None,
|
||||||
ids: List[str],
|
ids: Optional[List[str]] = None,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> List[str]:
|
||||||
"""Add embeddings to the vectorstore.
|
"""Add embeddings to the vectorstore.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -245,6 +239,12 @@ class PGVector(VectorStore):
|
|||||||
metadatas: List of metadatas associated with the texts.
|
metadatas: List of metadatas associated with the texts.
|
||||||
kwargs: vectorstore specific parameters
|
kwargs: vectorstore specific parameters
|
||||||
"""
|
"""
|
||||||
|
if ids is None:
|
||||||
|
ids = [str(uuid.uuid1()) for _ in texts]
|
||||||
|
|
||||||
|
if not metadatas:
|
||||||
|
metadatas = [{} for _ in texts]
|
||||||
|
|
||||||
with Session(self._conn) as session:
|
with Session(self._conn) as session:
|
||||||
collection = self.get_collection(session)
|
collection = self.get_collection(session)
|
||||||
if not collection:
|
if not collection:
|
||||||
@ -255,11 +255,13 @@ class PGVector(VectorStore):
|
|||||||
document=text,
|
document=text,
|
||||||
cmetadata=metadata,
|
cmetadata=metadata,
|
||||||
custom_id=id,
|
custom_id=id,
|
||||||
|
collection_id=collection.uuid,
|
||||||
)
|
)
|
||||||
collection.embeddings.append(embedding_store)
|
|
||||||
session.add(embedding_store)
|
session.add(embedding_store)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
|
return ids
|
||||||
|
|
||||||
def add_texts(
|
def add_texts(
|
||||||
self,
|
self,
|
||||||
texts: Iterable[str],
|
texts: Iterable[str],
|
||||||
@ -277,30 +279,10 @@ class PGVector(VectorStore):
|
|||||||
Returns:
|
Returns:
|
||||||
List of ids from adding the texts into the vectorstore.
|
List of ids from adding the texts into the vectorstore.
|
||||||
"""
|
"""
|
||||||
if ids is None:
|
|
||||||
ids = [str(uuid.uuid1()) for _ in texts]
|
|
||||||
|
|
||||||
embeddings = self.embedding_function.embed_documents(list(texts))
|
embeddings = self.embedding_function.embed_documents(list(texts))
|
||||||
|
return self.add_embeddings(
|
||||||
if not metadatas:
|
texts=texts, embeddings=embeddings, metadatas=metadatas, ids=ids, **kwargs
|
||||||
metadatas = [{} for _ in texts]
|
)
|
||||||
|
|
||||||
with Session(self._conn) as session:
|
|
||||||
collection = self.get_collection(session)
|
|
||||||
if not collection:
|
|
||||||
raise ValueError("Collection not found")
|
|
||||||
for text, metadata, embedding, id in zip(texts, metadatas, embeddings, ids):
|
|
||||||
embedding_store = EmbeddingStore(
|
|
||||||
embedding=embedding,
|
|
||||||
document=text,
|
|
||||||
cmetadata=metadata,
|
|
||||||
custom_id=id,
|
|
||||||
)
|
|
||||||
collection.embeddings.append(embedding_store)
|
|
||||||
session.add(embedding_store)
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
return ids
|
|
||||||
|
|
||||||
def similarity_search(
|
def similarity_search(
|
||||||
self,
|
self,
|
||||||
|
Loading…
Reference in New Issue
Block a user