feature:add knowledge embedding

This commit is contained in:
aries-ckt
2023-05-15 22:12:50 +08:00
parent 3c795154b2
commit ce4c3e823d
8 changed files with 88 additions and 38 deletions

View File

@@ -4,17 +4,31 @@ from pilot.source_embedding.pdf_embedding import PDFEmbedding
class KnowledgeEmbedding:
@staticmethod
def knowledge_embedding(file_path:str, model_name, vector_store_config):
if file_path.endswith(".pdf"):
embedding = PDFEmbedding(file_path=file_path, model_name=model_name,
vector_store_config=vector_store_config)
elif file_path.endswith(".md"):
embedding = MarkdownEmbedding(file_path=file_path, model_name=model_name,
vector_store_config=vector_store_config)
def __init__(self, file_path, model_name, vector_store_config):
"""Initialize with Loader url, model_name, vector_store_config"""
self.file_path = file_path
self.model_name = model_name
self.vector_store_config = vector_store_config
self.vector_store_type = "default"
self.knowledge_embedding_client = self.init_knowledge_embedding()
elif file_path.endswith(".csv"):
embedding = CSVEmbedding(file_path=file_path, model_name=model_name,
vector_store_config=vector_store_config)
def knowledge_embedding(self):
self.knowledge_embedding_client.source_embedding()
return embedding
def init_knowledge_embedding(self):
if self.file_path.endswith(".pdf"):
embedding = PDFEmbedding(file_path=self.file_path, model_name=self.model_name,
vector_store_config=self.vector_store_config)
elif self.file_path.endswith(".md"):
embedding = MarkdownEmbedding(file_path=self.file_path, model_name=self.model_name, vector_store_config=self.vector_store_config)
elif self.file_path.endswith(".csv"):
embedding = CSVEmbedding(file_path=self.file_path, model_name=self.model_name,
vector_store_config=self.vector_store_config)
elif self.vector_store_type == "default":
embedding = MarkdownEmbedding(file_path=self.file_path, model_name=self.model_name, vector_store_config=self.vector_store_config)
return embedding
def similar_search(self, text, topk):
return self.knowledge_embedding_client.similar_search(text, topk)