update:milvus batch

2025-08-19 08:47:32 +00:00 · 2023-05-25 17:17:02 +08:00 · 2023-05-25 17:17:02 +08:00 · adb8a5a316
commit adb8a5a316
parent 85906a3c45
2 changed files with 9 additions and 8 deletions
--- a/pilot/source_embedding/source_embedding.py
+++ b/pilot/source_embedding/source_embedding.py
@ -2,9 +2,6 @@
 # -*- coding: utf-8 -*-
 from abc import ABC, abstractmethod
 from typing import Dict, List, Optional
-
-from langchain.embeddings import HuggingFaceEmbeddings
-
 from pilot.configs.config import Config
 from pilot.vector_store.connector import VectorStoreConnector

@ -35,9 +32,7 @@ class SourceEmbedding(ABC):
        self.model_name = model_name
        self.vector_store_config = vector_store_config
        self.embedding_args = embedding_args
-        self.embeddings = HuggingFaceEmbeddings(model_name=self.model_name)
-
-        vector_store_config["embeddings"] = self.embeddings
+        self.embeddings = vector_store_config["embeddings"]
        self.vector_client = VectorStoreConnector(
            CFG.VECTOR_STORE_TYPE, vector_store_config
        )
--- a/pilot/vector_store/milvus_store.py
+++ b/pilot/vector_store/milvus_store.py
@ -6,6 +6,7 @@ from pymilvus import Collection, DataType, connections, utility
 from pilot.configs.config import Config
 from pilot.vector_store.vector_store_base import VectorStoreBase

+
 CFG = Config()


@ -107,6 +108,7 @@ class MilvusStore(VectorStoreBase):
            self.col = Collection(
                self.collection_name, using=self.alias
            )
+            self.fields = []
            for x in self.col.schema.fields:
                self.fields.append(x.name)
                if x.auto_id:
@ -131,7 +133,7 @@ class MilvusStore(VectorStoreBase):
            max_length = max(max_length, len(y))
        # Create the text field
        fields.append(
-            FieldSchema(text_field, DataType.VARCHAR, max_length=max_length + 1)
+            FieldSchema(text_field, DataType.VARCHAR, max_length=max_length + 100)
        )
        # primary key field
        fields.append(
@ -248,7 +250,11 @@ class MilvusStore(VectorStoreBase):

    def load_document(self, documents) -> None:
        """load document in vector database."""
-        self.init_schema_and_load(self.collection_name, documents)
+        batch_size = 500
+        batched_list = [documents[i:i + batch_size] for i in range(0, len(documents), batch_size)]
+        # docs = []
+        for doc_batch in batched_list:
+            self.init_schema_and_load(self.collection_name, doc_batch)

    def similar_search(self, text, topk) -> None:
        """similar_search in vector database."""