fix(VectorStore): fix task concurrency and batch processing

- Refactor task execution to improve concurrency control
- Implement batch processing for vector deletion in Chroma store
This commit is contained in:
tam
2025-05-06 23:23:40 +08:00
parent 3a00aca113
commit 4eb06299e8
2 changed files with 28 additions and 11 deletions

View File

@@ -1,5 +1,5 @@
"""Index store base class."""
import asyncio
import logging
import time
from abc import ABC, abstractmethod
@@ -182,9 +182,7 @@ class IndexStoreBase(ABC):
for chunk_group in chunk_groups:
tasks.append(self.aload_document(chunk_group))
import asyncio
results = await asyncio.gather(*tasks)
results = await self._run_tasks_with_concurrency(tasks, max_threads)
ids = []
loaded_cnt = 0
@@ -194,7 +192,13 @@ class IndexStoreBase(ABC):
logger.info(f"Loaded {loaded_cnt} chunks, total {len(chunks)} chunks.")
return ids
async def _run_tasks_with_concurrency(self, tasks, max_concurrent):
results = []
for i in range(0, len(tasks), max_concurrent):
batch = tasks[i:i + max_concurrent]
batch_results = await asyncio.gather(*batch, return_exceptions=True)
results.extend(batch_results)
return results
def similar_search(
self, text: str, topk: int, filters: Optional[MetadataFilters] = None
) -> List[Chunk]: