diff --git a/pilot/embedding_engine/source_embedding.py b/pilot/embedding_engine/source_embedding.py index f8f9458bc..64e422079 100644 --- a/pilot/embedding_engine/source_embedding.py +++ b/pilot/embedding_engine/source_embedding.py @@ -51,21 +51,33 @@ class SourceEmbedding(ABC): @register def data_process(self, text): - """pre process data.""" + """pre process data. + Args: + - text: raw text + """ @register def text_splitter(self, text_splitter: TextSplitter): - """add text split chunk""" + """add text split chunk + Args: + - text_splitter: TextSplitter + """ pass @register def text_to_vector(self, docs): - """transform vector""" + """transform vector + Args: + - docs: List[Document] + """ pass @register def index_to_store(self, docs): - """index to vector store""" + """index to vector store + Args: + - docs: List[Document] + """ self.vector_client = VectorStoreConnector( self.vector_store_config["vector_store_type"], self.vector_store_config ) @@ -73,7 +85,10 @@ class SourceEmbedding(ABC): @register def similar_search(self, doc, topk): - """vector store similarity_search""" + """vector store similarity_search + Args: + - query: query + """ self.vector_client = VectorStoreConnector( self.vector_store_config["vector_store_type"], self.vector_store_config ) @@ -89,6 +104,7 @@ class SourceEmbedding(ABC): return self.vector_client.vector_name_exists() def source_embedding(self): + """read()->data_process()->text_split()->index_to_store()""" if "read" in registered_methods: text = self.read() if "data_process" in registered_methods: diff --git a/pilot/vector_store/connector.py b/pilot/vector_store/connector.py index b174d7289..1bf86082f 100644 --- a/pilot/vector_store/connector.py +++ b/pilot/vector_store/connector.py @@ -34,13 +34,13 @@ class VectorStoreConnector: """load document in vector database.""" return self.client.load_document(docs) - def similar_search(self, query: str, topk: int): + def similar_search(self, doc: str, topk: int): """similar search in vector database. Args: - - query: query text + - doc: query text - topk: topk """ - return self.client.similar_search(query, topk) + return self.client.similar_search(doc, topk) def vector_name_exists(self): """is vector store name exist."""