mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-16 14:40:56 +00:00
feat: Add dbgpt client and add api v2
This commit is contained in:
@@ -4,7 +4,7 @@ import shutil
|
||||
import tempfile
|
||||
from typing import List
|
||||
|
||||
from fastapi import APIRouter, File, Form, UploadFile
|
||||
from fastapi import APIRouter, Depends, File, Form, UploadFile
|
||||
|
||||
from dbgpt._private.config import Config
|
||||
from dbgpt.app.knowledge.request.request import (
|
||||
@@ -16,7 +16,6 @@ from dbgpt.app.knowledge.request.request import (
|
||||
KnowledgeDocumentRequest,
|
||||
KnowledgeQueryRequest,
|
||||
KnowledgeSpaceRequest,
|
||||
KnowledgeSyncRequest,
|
||||
SpaceArgumentRequest,
|
||||
)
|
||||
from dbgpt.app.knowledge.request.response import KnowledgeQueryResponse
|
||||
@@ -31,6 +30,8 @@ from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory
|
||||
from dbgpt.rag.knowledge.base import ChunkStrategy
|
||||
from dbgpt.rag.knowledge.factory import KnowledgeFactory
|
||||
from dbgpt.rag.retriever.embedding import EmbeddingRetriever
|
||||
from dbgpt.serve.rag.api.schemas import KnowledgeSyncRequest
|
||||
from dbgpt.serve.rag.service.service import Service
|
||||
from dbgpt.storage.vector_store.base import VectorStoreConfig
|
||||
from dbgpt.storage.vector_store.connector import VectorStoreConnector
|
||||
from dbgpt.util.tracer import SpanType, root_tracer
|
||||
@@ -44,6 +45,11 @@ router = APIRouter()
|
||||
knowledge_space_service = KnowledgeService()
|
||||
|
||||
|
||||
def get_rag_service() -> Service:
|
||||
"""Get Rag Service."""
|
||||
return Service.get_instance(CFG.SYSTEM_APP)
|
||||
|
||||
|
||||
@router.post("/knowledge/space/add")
|
||||
def space_add(request: KnowledgeSpaceRequest):
|
||||
print(f"/space/add params: {request}")
|
||||
@@ -226,12 +232,20 @@ def document_sync(space_name: str, request: DocumentSyncRequest):
|
||||
|
||||
|
||||
@router.post("/knowledge/{space_name}/document/sync_batch")
|
||||
def batch_document_sync(space_name: str, request: List[KnowledgeSyncRequest]):
|
||||
def batch_document_sync(
|
||||
space_name: str,
|
||||
request: List[KnowledgeSyncRequest],
|
||||
service: Service = Depends(get_rag_service),
|
||||
):
|
||||
logger.info(f"Received params: {space_name}, {request}")
|
||||
try:
|
||||
doc_ids = knowledge_space_service.batch_document_sync(
|
||||
space_name=space_name, sync_requests=request
|
||||
)
|
||||
space = service.get({"name": space_name})
|
||||
for sync_request in request:
|
||||
sync_request.space_id = space.id
|
||||
doc_ids = service.sync_document(requests=request)
|
||||
# doc_ids = service.sync_document(
|
||||
# space_name=space_name, sync_requests=request
|
||||
# )
|
||||
return Result.succ({"tasks": doc_ids})
|
||||
except Exception as e:
|
||||
return Result.failed(code="E000X", msg=f"document sync error {e}")
|
||||
|
@@ -1,9 +1,11 @@
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from sqlalchemy import Column, DateTime, Integer, String, Text, func
|
||||
|
||||
from dbgpt._private.config import Config
|
||||
from dbgpt.serve.conversation.api.schemas import ServeRequest
|
||||
from dbgpt.serve.rag.api.schemas import DocumentServeRequest, DocumentServeResponse
|
||||
from dbgpt.storage.metadata import BaseDao, Model
|
||||
|
||||
CFG = Config()
|
||||
@@ -218,3 +220,70 @@ class KnowledgeDocumentDao(BaseDao):
|
||||
knowledge_documents.delete()
|
||||
session.commit()
|
||||
session.close()
|
||||
|
||||
def from_request(
|
||||
self, request: Union[ServeRequest, Dict[str, Any]]
|
||||
) -> KnowledgeDocumentEntity:
|
||||
"""Convert the request to an entity
|
||||
|
||||
Args:
|
||||
request (Union[ServeRequest, Dict[str, Any]]): The request
|
||||
|
||||
Returns:
|
||||
T: The entity
|
||||
"""
|
||||
request_dict = (
|
||||
request.dict() if isinstance(request, DocumentServeRequest) else request
|
||||
)
|
||||
entity = KnowledgeDocumentEntity(**request_dict)
|
||||
return entity
|
||||
|
||||
def to_request(self, entity: KnowledgeDocumentEntity) -> DocumentServeResponse:
|
||||
"""Convert the entity to a request
|
||||
|
||||
Args:
|
||||
entity (T): The entity
|
||||
|
||||
Returns:
|
||||
REQ: The request
|
||||
"""
|
||||
return DocumentServeResponse(
|
||||
id=entity.id,
|
||||
doc_name=entity.doc_name,
|
||||
doc_type=entity.doc_type,
|
||||
space=entity.space,
|
||||
chunk_size=entity.chunk_size,
|
||||
status=entity.status,
|
||||
last_sync=entity.last_sync,
|
||||
content=entity.content,
|
||||
result=entity.result,
|
||||
vector_ids=entity.vector_ids,
|
||||
summary=entity.summary,
|
||||
gmt_created=entity.gmt_created,
|
||||
gmt_modified=entity.gmt_modified,
|
||||
)
|
||||
|
||||
def to_response(self, entity: KnowledgeDocumentEntity) -> DocumentServeResponse:
|
||||
"""Convert the entity to a response
|
||||
|
||||
Args:
|
||||
entity (T): The entity
|
||||
|
||||
Returns:
|
||||
REQ: The request
|
||||
"""
|
||||
return DocumentServeResponse(
|
||||
id=entity.id,
|
||||
doc_name=entity.doc_name,
|
||||
doc_type=entity.doc_type,
|
||||
space=entity.space,
|
||||
chunk_size=entity.chunk_size,
|
||||
status=entity.status,
|
||||
last_sync=entity.last_sync,
|
||||
content=entity.content,
|
||||
result=entity.result,
|
||||
vector_ids=entity.vector_ids,
|
||||
summary=entity.summary,
|
||||
gmt_created=entity.gmt_created,
|
||||
gmt_modified=entity.gmt_modified,
|
||||
)
|
||||
|
@@ -17,6 +17,8 @@ class KnowledgeQueryRequest(BaseModel):
|
||||
class KnowledgeSpaceRequest(BaseModel):
|
||||
"""name: knowledge space name"""
|
||||
|
||||
"""vector_type: vector type"""
|
||||
id: int = None
|
||||
name: str = None
|
||||
"""vector_type: vector type"""
|
||||
vector_type: str = None
|
||||
@@ -37,9 +39,6 @@ class KnowledgeDocumentRequest(BaseModel):
|
||||
"""content: content"""
|
||||
source: str = None
|
||||
|
||||
"""text_chunk_size: text_chunk_size"""
|
||||
# text_chunk_size: int
|
||||
|
||||
|
||||
class DocumentQueryRequest(BaseModel):
|
||||
"""doc_name: doc path"""
|
||||
@@ -80,18 +79,18 @@ class DocumentSyncRequest(BaseModel):
|
||||
chunk_overlap: Optional[int] = None
|
||||
|
||||
|
||||
class KnowledgeSyncRequest(BaseModel):
|
||||
"""Sync request"""
|
||||
|
||||
"""doc_ids: doc ids"""
|
||||
doc_id: int
|
||||
|
||||
"""model_name: model name"""
|
||||
model_name: Optional[str] = None
|
||||
|
||||
"""chunk_parameters: chunk parameters
|
||||
"""
|
||||
chunk_parameters: ChunkParameters
|
||||
# class KnowledgeSyncRequest(BaseModel):
|
||||
# """Sync request"""
|
||||
#
|
||||
# """doc_ids: doc ids"""
|
||||
# doc_id: int
|
||||
#
|
||||
# """model_name: model name"""
|
||||
# model_name: Optional[str] = None
|
||||
#
|
||||
# """chunk_parameters: chunk parameters
|
||||
# """
|
||||
# chunk_parameters: ChunkParameters
|
||||
|
||||
|
||||
class ChunkQueryRequest(BaseModel):
|
||||
|
@@ -1,7 +1,6 @@
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import List
|
||||
|
||||
from dbgpt._private.config import Config
|
||||
@@ -17,7 +16,6 @@ from dbgpt.app.knowledge.request.request import (
|
||||
DocumentSyncRequest,
|
||||
KnowledgeDocumentRequest,
|
||||
KnowledgeSpaceRequest,
|
||||
KnowledgeSyncRequest,
|
||||
SpaceArgumentRequest,
|
||||
)
|
||||
from dbgpt.app.knowledge.request.response import (
|
||||
@@ -25,7 +23,6 @@ from dbgpt.app.knowledge.request.response import (
|
||||
DocumentQueryResponse,
|
||||
SpaceQueryResponse,
|
||||
)
|
||||
from dbgpt.app.knowledge.space_db import KnowledgeSpaceDao, KnowledgeSpaceEntity
|
||||
from dbgpt.component import ComponentType
|
||||
from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG
|
||||
from dbgpt.core import Chunk
|
||||
@@ -38,8 +35,11 @@ from dbgpt.rag.text_splitter.text_splitter import (
|
||||
RecursiveCharacterTextSplitter,
|
||||
SpacyTextSplitter,
|
||||
)
|
||||
from dbgpt.serve.rag.api.schemas import KnowledgeSyncRequest
|
||||
from dbgpt.serve.rag.assembler.embedding import EmbeddingAssembler
|
||||
from dbgpt.serve.rag.assembler.summary import SummaryAssembler
|
||||
from dbgpt.serve.rag.models.models import KnowledgeSpaceDao, KnowledgeSpaceEntity
|
||||
from dbgpt.serve.rag.service.service import Service, SyncStatus
|
||||
from dbgpt.storage.vector_store.base import VectorStoreConfig
|
||||
from dbgpt.storage.vector_store.connector import VectorStoreConnector
|
||||
from dbgpt.util.executor_utils import ExecutorFactory, blocking_func_to_async
|
||||
@@ -53,13 +53,6 @@ logger = logging.getLogger(__name__)
|
||||
CFG = Config()
|
||||
|
||||
|
||||
class SyncStatus(Enum):
|
||||
TODO = "TODO"
|
||||
FAILED = "FAILED"
|
||||
RUNNING = "RUNNING"
|
||||
FINISHED = "FINISHED"
|
||||
|
||||
|
||||
# default summary max iteration call with llm.
|
||||
DEFAULT_SUMMARY_MAX_ITERATION = 5
|
||||
# default summary concurrency call with llm.
|
||||
@@ -88,8 +81,8 @@ class KnowledgeService:
|
||||
spaces = knowledge_space_dao.get_knowledge_space(query)
|
||||
if len(spaces) > 0:
|
||||
raise Exception(f"space name:{request.name} have already named")
|
||||
knowledge_space_dao.create_knowledge_space(request)
|
||||
return True
|
||||
space_id = knowledge_space_dao.create_knowledge_space(request)
|
||||
return space_id
|
||||
|
||||
def create_knowledge_document(self, space, request: KnowledgeDocumentRequest):
|
||||
"""create knowledge document
|
||||
@@ -199,7 +192,9 @@ class KnowledgeService:
|
||||
return res
|
||||
|
||||
def batch_document_sync(
|
||||
self, space_name, sync_requests: List[KnowledgeSyncRequest]
|
||||
self,
|
||||
space_name,
|
||||
sync_requests: List[KnowledgeSyncRequest],
|
||||
) -> List[int]:
|
||||
"""batch sync knowledge document chunk into vector store
|
||||
Args:
|
||||
|
@@ -1,93 +1,93 @@
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import Column, DateTime, Integer, String, Text
|
||||
|
||||
from dbgpt._private.config import Config
|
||||
from dbgpt.app.knowledge.request.request import KnowledgeSpaceRequest
|
||||
from dbgpt.storage.metadata import BaseDao, Model
|
||||
|
||||
CFG = Config()
|
||||
|
||||
|
||||
class KnowledgeSpaceEntity(Model):
|
||||
__tablename__ = "knowledge_space"
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String(100))
|
||||
vector_type = Column(String(100))
|
||||
desc = Column(String(100))
|
||||
owner = Column(String(100))
|
||||
context = Column(Text)
|
||||
gmt_created = Column(DateTime)
|
||||
gmt_modified = Column(DateTime)
|
||||
|
||||
def __repr__(self):
|
||||
return f"KnowledgeSpaceEntity(id={self.id}, name='{self.name}', vector_type='{self.vector_type}', desc='{self.desc}', owner='{self.owner}' context='{self.context}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"
|
||||
|
||||
|
||||
class KnowledgeSpaceDao(BaseDao):
|
||||
def create_knowledge_space(self, space: KnowledgeSpaceRequest):
|
||||
session = self.get_raw_session()
|
||||
knowledge_space = KnowledgeSpaceEntity(
|
||||
name=space.name,
|
||||
vector_type=CFG.VECTOR_STORE_TYPE,
|
||||
desc=space.desc,
|
||||
owner=space.owner,
|
||||
gmt_created=datetime.now(),
|
||||
gmt_modified=datetime.now(),
|
||||
)
|
||||
session.add(knowledge_space)
|
||||
session.commit()
|
||||
session.close()
|
||||
|
||||
def get_knowledge_space(self, query: KnowledgeSpaceEntity):
|
||||
session = self.get_raw_session()
|
||||
knowledge_spaces = session.query(KnowledgeSpaceEntity)
|
||||
if query.id is not None:
|
||||
knowledge_spaces = knowledge_spaces.filter(
|
||||
KnowledgeSpaceEntity.id == query.id
|
||||
)
|
||||
if query.name is not None:
|
||||
knowledge_spaces = knowledge_spaces.filter(
|
||||
KnowledgeSpaceEntity.name == query.name
|
||||
)
|
||||
if query.vector_type is not None:
|
||||
knowledge_spaces = knowledge_spaces.filter(
|
||||
KnowledgeSpaceEntity.vector_type == query.vector_type
|
||||
)
|
||||
if query.desc is not None:
|
||||
knowledge_spaces = knowledge_spaces.filter(
|
||||
KnowledgeSpaceEntity.desc == query.desc
|
||||
)
|
||||
if query.owner is not None:
|
||||
knowledge_spaces = knowledge_spaces.filter(
|
||||
KnowledgeSpaceEntity.owner == query.owner
|
||||
)
|
||||
if query.gmt_created is not None:
|
||||
knowledge_spaces = knowledge_spaces.filter(
|
||||
KnowledgeSpaceEntity.gmt_created == query.gmt_created
|
||||
)
|
||||
if query.gmt_modified is not None:
|
||||
knowledge_spaces = knowledge_spaces.filter(
|
||||
KnowledgeSpaceEntity.gmt_modified == query.gmt_modified
|
||||
)
|
||||
|
||||
knowledge_spaces = knowledge_spaces.order_by(
|
||||
KnowledgeSpaceEntity.gmt_created.desc()
|
||||
)
|
||||
result = knowledge_spaces.all()
|
||||
session.close()
|
||||
return result
|
||||
|
||||
def update_knowledge_space(self, space: KnowledgeSpaceEntity):
|
||||
session = self.get_raw_session()
|
||||
session.merge(space)
|
||||
session.commit()
|
||||
session.close()
|
||||
return True
|
||||
|
||||
def delete_knowledge_space(self, space: KnowledgeSpaceEntity):
|
||||
session = self.get_raw_session()
|
||||
if space:
|
||||
session.delete(space)
|
||||
session.commit()
|
||||
session.close()
|
||||
# from datetime import datetime
|
||||
#
|
||||
# from sqlalchemy import Column, DateTime, Integer, String, Text
|
||||
#
|
||||
# from dbgpt._private.config import Config
|
||||
# from dbgpt.app.knowledge.request.request import KnowledgeSpaceRequest
|
||||
# from dbgpt.storage.metadata import BaseDao, Model
|
||||
#
|
||||
# CFG = Config()
|
||||
#
|
||||
#
|
||||
# class KnowledgeSpaceEntity(Model):
|
||||
# __tablename__ = "knowledge_space"
|
||||
# id = Column(Integer, primary_key=True)
|
||||
# name = Column(String(100))
|
||||
# vector_type = Column(String(100))
|
||||
# desc = Column(String(100))
|
||||
# owner = Column(String(100))
|
||||
# context = Column(Text)
|
||||
# gmt_created = Column(DateTime)
|
||||
# gmt_modified = Column(DateTime)
|
||||
#
|
||||
# def __repr__(self):
|
||||
# return f"KnowledgeSpaceEntity(id={self.id}, name='{self.name}', vector_type='{self.vector_type}', desc='{self.desc}', owner='{self.owner}' context='{self.context}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"
|
||||
#
|
||||
#
|
||||
# class KnowledgeSpaceDao(BaseDao):
|
||||
# def create_knowledge_space(self, space: KnowledgeSpaceRequest):
|
||||
# session = self.get_raw_session()
|
||||
# knowledge_space = KnowledgeSpaceEntity(
|
||||
# name=space.name,
|
||||
# vector_type=CFG.VECTOR_STORE_TYPE,
|
||||
# desc=space.desc,
|
||||
# owner=space.owner,
|
||||
# gmt_created=datetime.now(),
|
||||
# gmt_modified=datetime.now(),
|
||||
# )
|
||||
# session.add(knowledge_space)
|
||||
# session.commit()
|
||||
# session.close()
|
||||
#
|
||||
# def get_knowledge_space(self, query: KnowledgeSpaceEntity):
|
||||
# session = self.get_raw_session()
|
||||
# knowledge_spaces = session.query(KnowledgeSpaceEntity)
|
||||
# if query.id is not None:
|
||||
# knowledge_spaces = knowledge_spaces.filter(
|
||||
# KnowledgeSpaceEntity.id == query.id
|
||||
# )
|
||||
# if query.name is not None:
|
||||
# knowledge_spaces = knowledge_spaces.filter(
|
||||
# KnowledgeSpaceEntity.name == query.name
|
||||
# )
|
||||
# if query.vector_type is not None:
|
||||
# knowledge_spaces = knowledge_spaces.filter(
|
||||
# KnowledgeSpaceEntity.vector_type == query.vector_type
|
||||
# )
|
||||
# if query.desc is not None:
|
||||
# knowledge_spaces = knowledge_spaces.filter(
|
||||
# KnowledgeSpaceEntity.desc == query.desc
|
||||
# )
|
||||
# if query.owner is not None:
|
||||
# knowledge_spaces = knowledge_spaces.filter(
|
||||
# KnowledgeSpaceEntity.owner == query.owner
|
||||
# )
|
||||
# if query.gmt_created is not None:
|
||||
# knowledge_spaces = knowledge_spaces.filter(
|
||||
# KnowledgeSpaceEntity.gmt_created == query.gmt_created
|
||||
# )
|
||||
# if query.gmt_modified is not None:
|
||||
# knowledge_spaces = knowledge_spaces.filter(
|
||||
# KnowledgeSpaceEntity.gmt_modified == query.gmt_modified
|
||||
# )
|
||||
#
|
||||
# knowledge_spaces = knowledge_spaces.order_by(
|
||||
# KnowledgeSpaceEntity.gmt_created.desc()
|
||||
# )
|
||||
# result = knowledge_spaces.all()
|
||||
# session.close()
|
||||
# return result
|
||||
#
|
||||
# def update_knowledge_space(self, space: KnowledgeSpaceEntity):
|
||||
# session = self.get_raw_session()
|
||||
# session.merge(space)
|
||||
# session.commit()
|
||||
# session.close()
|
||||
# return True
|
||||
#
|
||||
# def delete_knowledge_space(self, space: KnowledgeSpaceEntity):
|
||||
# session = self.get_raw_session()
|
||||
# if space:
|
||||
# session.delete(space)
|
||||
# session.commit()
|
||||
# session.close()
|
||||
|
Reference in New Issue
Block a user