mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-05 18:33:52 +00:00
fix:knowledge_init.py multi document cannot search answser (#287)
1.rebuild knowledge_init.py 2.change the VectorConnector position Close #285
This commit is contained in:
commit
c04d8e0bbf
@ -51,6 +51,7 @@ class KnowledgeEmbedding:
|
|||||||
self.knowledge_embedding_client.index_to_store(docs)
|
self.knowledge_embedding_client.index_to_store(docs)
|
||||||
|
|
||||||
def read(self):
|
def read(self):
|
||||||
|
self.knowledge_embedding_client = self.init_knowledge_embedding()
|
||||||
return self.knowledge_embedding_client.read_batch()
|
return self.knowledge_embedding_client.read_batch()
|
||||||
|
|
||||||
def init_knowledge_embedding(self):
|
def init_knowledge_embedding(self):
|
||||||
|
@ -33,9 +33,6 @@ class SourceEmbedding(ABC):
|
|||||||
self.vector_store_config = vector_store_config
|
self.vector_store_config = vector_store_config
|
||||||
self.embedding_args = embedding_args
|
self.embedding_args = embedding_args
|
||||||
self.embeddings = vector_store_config["embeddings"]
|
self.embeddings = vector_store_config["embeddings"]
|
||||||
self.vector_client = VectorStoreConnector(
|
|
||||||
CFG.VECTOR_STORE_TYPE, vector_store_config
|
|
||||||
)
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@register
|
@register
|
||||||
@ -59,11 +56,17 @@ class SourceEmbedding(ABC):
|
|||||||
@register
|
@register
|
||||||
def index_to_store(self, docs):
|
def index_to_store(self, docs):
|
||||||
"""index to vector store"""
|
"""index to vector store"""
|
||||||
|
self.vector_client = VectorStoreConnector(
|
||||||
|
CFG.VECTOR_STORE_TYPE, self.vector_store_config
|
||||||
|
)
|
||||||
self.vector_client.load_document(docs)
|
self.vector_client.load_document(docs)
|
||||||
|
|
||||||
@register
|
@register
|
||||||
def similar_search(self, doc, topk):
|
def similar_search(self, doc, topk):
|
||||||
"""vector store similarity_search"""
|
"""vector store similarity_search"""
|
||||||
|
self.vector_client = VectorStoreConnector(
|
||||||
|
CFG.VECTOR_STORE_TYPE, self.vector_store_config
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
ans = self.vector_client.similar_search(doc, topk)
|
ans = self.vector_client.similar_search(doc, topk)
|
||||||
except NotEnoughElementsException:
|
except NotEnoughElementsException:
|
||||||
@ -71,6 +74,9 @@ class SourceEmbedding(ABC):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def vector_name_exist(self):
|
def vector_name_exist(self):
|
||||||
|
self.vector_client = VectorStoreConnector(
|
||||||
|
CFG.VECTOR_STORE_TYPE, self.vector_store_config
|
||||||
|
)
|
||||||
return self.vector_client.vector_name_exists()
|
return self.vector_client.vector_name_exists()
|
||||||
|
|
||||||
def source_embedding(self):
|
def source_embedding(self):
|
||||||
|
@ -25,17 +25,20 @@ class LocalKnowledgeInit:
|
|||||||
|
|
||||||
def knowledge_persist(self, file_path):
|
def knowledge_persist(self, file_path):
|
||||||
"""knowledge persist"""
|
"""knowledge persist"""
|
||||||
|
docs = []
|
||||||
|
embedding_engine = None
|
||||||
for root, _, files in os.walk(file_path, topdown=False):
|
for root, _, files in os.walk(file_path, topdown=False):
|
||||||
for file in files:
|
for file in files:
|
||||||
filename = os.path.join(root, file)
|
filename = os.path.join(root, file)
|
||||||
# docs = self._load_file(filename)
|
|
||||||
ke = KnowledgeEmbedding(
|
ke = KnowledgeEmbedding(
|
||||||
file_path=filename,
|
file_path=filename,
|
||||||
model_name=self.model_name,
|
model_name=self.model_name,
|
||||||
vector_store_config=self.vector_store_config,
|
vector_store_config=self.vector_store_config,
|
||||||
)
|
)
|
||||||
client = ke.init_knowledge_embedding()
|
embedding_engine = ke.init_knowledge_embedding()
|
||||||
client.source_embedding()
|
doc = ke.read()
|
||||||
|
docs.extend(doc)
|
||||||
|
embedding_engine.index_to_store(docs)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Loading…
Reference in New Issue
Block a user