fix(VectorStore): fix task concurrency and batch processing

- Refactor task execution to improve concurrency control - Implement batch processing for vector deletion in Chroma store
2025-09-09 12:59:43 +00:00 · 2025-05-06 23:23:40 +08:00
parent 3a00aca113
commit 4eb06299e8
2 changed files with 28 additions and 11 deletions
--- a/packages/dbgpt-core/src/dbgpt/storage/base.py
+++ b/packages/dbgpt-core/src/dbgpt/storage/base.py
@@ -1,5 +1,5 @@
 """Index store base class."""
-
+import asyncio
 import logging
 import time
 from abc import ABC, abstractmethod
@@ -182,9 +182,7 @@ class IndexStoreBase(ABC):
        for chunk_group in chunk_groups:
            tasks.append(self.aload_document(chunk_group))

-        import asyncio
-
-        results = await asyncio.gather(*tasks)
+        results = await self._run_tasks_with_concurrency(tasks, max_threads)

        ids = []
        loaded_cnt = 0
@@ -194,7 +192,13 @@ class IndexStoreBase(ABC):
            logger.info(f"Loaded {loaded_cnt} chunks, total {len(chunks)} chunks.")

        return ids
-
+    async def _run_tasks_with_concurrency(self, tasks, max_concurrent):
+        results = []
+        for i in range(0, len(tasks), max_concurrent):
+            batch = tasks[i:i + max_concurrent]
+            batch_results = await asyncio.gather(*batch, return_exceptions=True)
+            results.extend(batch_results)
+        return results
    def similar_search(
        self, text: str, topk: int, filters: Optional[MetadataFilters] = None
    ) -> List[Chunk]: