fix:knowledge_init.py multi document cannot search answser (#287)

1.rebuild knowledge_init.py
2.change the VectorConnector position
Close #285
This commit is contained in:
magic.chen 2023-06-30 15:05:42 +08:00 committed by GitHub
commit c04d8e0bbf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 6 deletions

View File

@ -51,6 +51,7 @@ class KnowledgeEmbedding:
self.knowledge_embedding_client.index_to_store(docs) self.knowledge_embedding_client.index_to_store(docs)
def read(self): def read(self):
self.knowledge_embedding_client = self.init_knowledge_embedding()
return self.knowledge_embedding_client.read_batch() return self.knowledge_embedding_client.read_batch()
def init_knowledge_embedding(self): def init_knowledge_embedding(self):

View File

@ -33,9 +33,6 @@ class SourceEmbedding(ABC):
self.vector_store_config = vector_store_config self.vector_store_config = vector_store_config
self.embedding_args = embedding_args self.embedding_args = embedding_args
self.embeddings = vector_store_config["embeddings"] self.embeddings = vector_store_config["embeddings"]
self.vector_client = VectorStoreConnector(
CFG.VECTOR_STORE_TYPE, vector_store_config
)
@abstractmethod @abstractmethod
@register @register
@ -59,11 +56,17 @@ class SourceEmbedding(ABC):
@register @register
def index_to_store(self, docs): def index_to_store(self, docs):
"""index to vector store""" """index to vector store"""
self.vector_client = VectorStoreConnector(
CFG.VECTOR_STORE_TYPE, self.vector_store_config
)
self.vector_client.load_document(docs) self.vector_client.load_document(docs)
@register @register
def similar_search(self, doc, topk): def similar_search(self, doc, topk):
"""vector store similarity_search""" """vector store similarity_search"""
self.vector_client = VectorStoreConnector(
CFG.VECTOR_STORE_TYPE, self.vector_store_config
)
try: try:
ans = self.vector_client.similar_search(doc, topk) ans = self.vector_client.similar_search(doc, topk)
except NotEnoughElementsException: except NotEnoughElementsException:
@ -71,6 +74,9 @@ class SourceEmbedding(ABC):
return ans return ans
def vector_name_exist(self): def vector_name_exist(self):
self.vector_client = VectorStoreConnector(
CFG.VECTOR_STORE_TYPE, self.vector_store_config
)
return self.vector_client.vector_name_exists() return self.vector_client.vector_name_exists()
def source_embedding(self): def source_embedding(self):

View File

@ -25,17 +25,20 @@ class LocalKnowledgeInit:
def knowledge_persist(self, file_path): def knowledge_persist(self, file_path):
"""knowledge persist""" """knowledge persist"""
docs = []
embedding_engine = None
for root, _, files in os.walk(file_path, topdown=False): for root, _, files in os.walk(file_path, topdown=False):
for file in files: for file in files:
filename = os.path.join(root, file) filename = os.path.join(root, file)
# docs = self._load_file(filename)
ke = KnowledgeEmbedding( ke = KnowledgeEmbedding(
file_path=filename, file_path=filename,
model_name=self.model_name, model_name=self.model_name,
vector_store_config=self.vector_store_config, vector_store_config=self.vector_store_config,
) )
client = ke.init_knowledge_embedding() embedding_engine = ke.init_knowledge_embedding()
client.source_embedding() doc = ke.read()
docs.extend(doc)
embedding_engine.index_to_store(docs)
if __name__ == "__main__": if __name__ == "__main__":