mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-30 02:13:23 +00:00
Fix SupbaseVectoreStore write operation timeout (#12318)
**Description** This small change will make chunk_size a configurable parameter for loading documents into a Supabase database. **Issue** https://github.com/langchain-ai/langchain/issues/11422 **Dependencies** No chanages **Twitter** @ j1philli **Reminder** If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --------- Co-authored-by: Greg Richardson <greg.nmr@gmail.com>
This commit is contained in:
parent
b10cefb160
commit
01c5cd365b
@ -197,7 +197,7 @@
|
||||
"id": "5abb9b93",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Insert the above documents into the database. Embeddings will automatically be generated for each document."
|
||||
"Insert the above documents into the database. Embeddings will automatically be generated for each document. You can adjust the chunk_size based on the amount of documents you have. The default is 500 but lowering it may be necessary."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -208,7 +208,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\")"
|
||||
"vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\", chunk_size=500)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -61,6 +61,7 @@ class SupabaseVectorStore(VectorStore):
|
||||
client=supabase_client,
|
||||
table_name="documents",
|
||||
query_name="match_documents",
|
||||
chunk_size=500,
|
||||
)
|
||||
|
||||
To load from an existing table:
|
||||
@ -88,6 +89,7 @@ class SupabaseVectorStore(VectorStore):
|
||||
client: supabase.client.Client,
|
||||
embedding: Embeddings,
|
||||
table_name: str,
|
||||
chunk_size: int = 500,
|
||||
query_name: Union[str, None] = None,
|
||||
) -> None:
|
||||
"""Initialize with supabase client."""
|
||||
@ -103,6 +105,9 @@ class SupabaseVectorStore(VectorStore):
|
||||
self._embedding: Embeddings = embedding
|
||||
self.table_name = table_name or "documents"
|
||||
self.query_name = query_name or "match_documents"
|
||||
self.chunk_size = chunk_size or 500
|
||||
# According to the SupabaseVectorStore JS implementation, the best chunk size
|
||||
# is 500. Though for large datasets it can be too large so it is configurable.
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Embeddings:
|
||||
@ -130,6 +135,7 @@ class SupabaseVectorStore(VectorStore):
|
||||
client: Optional[supabase.client.Client] = None,
|
||||
table_name: Optional[str] = "documents",
|
||||
query_name: Union[str, None] = "match_documents",
|
||||
chunk_size: int = 500,
|
||||
ids: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> "SupabaseVectorStore":
|
||||
@ -144,13 +150,14 @@ class SupabaseVectorStore(VectorStore):
|
||||
embeddings = embedding.embed_documents(texts)
|
||||
ids = [str(uuid.uuid4()) for _ in texts]
|
||||
docs = cls._texts_to_documents(texts, metadatas)
|
||||
cls._add_vectors(client, table_name, embeddings, docs, ids)
|
||||
cls._add_vectors(client, table_name, embeddings, docs, ids, chunk_size)
|
||||
|
||||
return cls(
|
||||
client=client,
|
||||
embedding=embedding,
|
||||
table_name=table_name,
|
||||
query_name=query_name,
|
||||
chunk_size=chunk_size,
|
||||
)
|
||||
|
||||
def add_vectors(
|
||||
@ -159,7 +166,9 @@ class SupabaseVectorStore(VectorStore):
|
||||
documents: List[Document],
|
||||
ids: List[str],
|
||||
) -> List[str]:
|
||||
return self._add_vectors(self._client, self.table_name, vectors, documents, ids)
|
||||
return self._add_vectors(
|
||||
self._client, self.table_name, vectors, documents, ids, self.chunk_size
|
||||
)
|
||||
|
||||
def similarity_search(
|
||||
self,
|
||||
@ -300,6 +309,7 @@ class SupabaseVectorStore(VectorStore):
|
||||
vectors: List[List[float]],
|
||||
documents: List[Document],
|
||||
ids: List[str],
|
||||
chunk_size: int,
|
||||
) -> List[str]:
|
||||
"""Add vectors to Supabase table."""
|
||||
|
||||
@ -313,9 +323,6 @@ class SupabaseVectorStore(VectorStore):
|
||||
for idx, embedding in enumerate(vectors)
|
||||
]
|
||||
|
||||
# According to the SupabaseVectorStore JS implementation, the best chunk size
|
||||
# is 500
|
||||
chunk_size = 500
|
||||
id_list: List[str] = []
|
||||
for i in range(0, len(rows), chunk_size):
|
||||
chunk = rows[i : i + chunk_size]
|
||||
|
@ -3,7 +3,7 @@
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> int:
|
||||
print("Hello World!")
|
||||
|
||||
return 0
|
||||
|
Loading…
Reference in New Issue
Block a user