Files
DB-GPT/tools/knowlege_init.py
aries_ckt 821fcaacdf fix:bind knowledge_init with space
bind knowledge_init with space
2023-06-30 11:31:47 +08:00

64 lines
2.3 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import os
import sys
from pilot.embedding_engine.knowledge_type import KnowledgeType
from pilot.openapi.knowledge.knowledge_service import KnowledgeService
from pilot.openapi.knowledge.request.knowledge_request import KnowledgeSpaceRequest
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
from pilot.configs.config import Config
from pilot.configs.model_config import (
DATASETS_DIR,
LLM_MODEL_CONFIG,
)
from pilot.embedding_engine.knowledge_embedding import KnowledgeEmbedding
knowledge_space_service = KnowledgeService()
CFG = Config()
class LocalKnowledgeInit:
embeddings: object = None
def __init__(self, vector_store_config) -> None:
self.vector_store_config = vector_store_config
self.model_name = LLM_MODEL_CONFIG[CFG.EMBEDDING_MODEL]
def knowledge_persist(self, file_path):
"""knowledge persist"""
for root, _, files in os.walk(file_path, topdown=False):
for file in files:
filename = os.path.join(root, file)
# docs = self._load_file(filename)
ke = KnowledgeEmbedding(
knowledge_source=filename,
knowledge_type=KnowledgeType.DOCUMENT.value,
model_name=self.model_name,
vector_store_config=self.vector_store_config,
)
client = ke.init_knowledge_embedding()
client.source_embedding()
print(f"""begin create {self.vector_store_config["vector_store_name"]} space""")
space = KnowledgeSpaceRequest
space.name = self.vector_store_config["vector_store_name"]
space.desc = ""
space.owner = "knowledge_init.py"
knowledge_space_service.create_knowledge_space(space)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--vector_name", type=str, default="default")
args = parser.parse_args()
vector_name = args.vector_name
store_type = CFG.VECTOR_STORE_TYPE
vector_store_config = {"vector_store_name": vector_name}
print(vector_store_config)
kv = LocalKnowledgeInit(vector_store_config=vector_store_config)
kv.knowledge_persist(file_path=DATASETS_DIR)
print("your knowledge embedding success...")