feature:knowledge embedding support file path auto adapt

This commit is contained in:
aries-ckt
2023-06-05 16:26:19 +08:00
parent 31d457cfd5
commit be1a792d3c
16 changed files with 140 additions and 82 deletions

View File

@@ -19,36 +19,32 @@ CFG = Config()
class LocalKnowledgeInit:
embeddings: object = None
model_name = LLM_MODEL_CONFIG["text2vec"]
top_k: int = VECTOR_SEARCH_TOP_K
def __init__(self, vector_store_config) -> None:
self.vector_store_config = vector_store_config
self.model_name = LLM_MODEL_CONFIG["text2vec"]
def knowledge_persist(self, file_path, append_mode):
"""knowledge persist"""
kv = KnowledgeEmbedding(
file_path=file_path,
model_name=LLM_MODEL_CONFIG["text2vec"],
vector_store_config=self.vector_store_config,
)
vector_store = kv.knowledge_persist_initialization(append_mode)
return vector_store
for root, _, files in os.walk(file_path, topdown=False):
for file in files:
filename = os.path.join(root, file)
# docs = self._load_file(filename)
ke = KnowledgeEmbedding(
file_path=filename,
model_name=self.model_name,
vector_store_config=self.vector_store_config,
)
client = ke.init_knowledge_embedding()
client.source_embedding()
def query(self, q):
"""Query similar doc from Vector"""
vector_store = self.init_vector_store()
docs = vector_store.similarity_search_with_score(q, k=self.top_k)
for doc in docs:
dc, s = doc
yield s, dc
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--vector_name", type=str, default="default")
parser.add_argument("--append", type=bool, default=False)
parser.add_argument("--store_type", type=str, default="Chroma")
args = parser.parse_args()
vector_name = args.vector_name
append_mode = args.append
@@ -56,5 +52,5 @@ if __name__ == "__main__":
vector_store_config = {"vector_store_name": vector_name}
print(vector_store_config)
kv = LocalKnowledgeInit(vector_store_config=vector_store_config)
vector_store = kv.knowledge_persist(file_path=DATASETS_DIR, append_mode=append_mode)
kv.knowledge_persist(file_path=DATASETS_DIR, append_mode=append_mode)
print("your knowledge embedding success...")