feat: Add dbgpt client and add api v2

This commit is contained in:
Fangyin Cheng
2024-03-18 18:24:08 +08:00
parent 4970c9f813
commit 0ed30aa44a
39 changed files with 2663 additions and 143 deletions

View File

@@ -4,7 +4,7 @@ import shutil
import tempfile
from typing import List
from fastapi import APIRouter, File, Form, UploadFile
from fastapi import APIRouter, Depends, File, Form, UploadFile
from dbgpt._private.config import Config
from dbgpt.app.knowledge.request.request import (
@@ -16,7 +16,6 @@ from dbgpt.app.knowledge.request.request import (
KnowledgeDocumentRequest,
KnowledgeQueryRequest,
KnowledgeSpaceRequest,
KnowledgeSyncRequest,
SpaceArgumentRequest,
)
from dbgpt.app.knowledge.request.response import KnowledgeQueryResponse
@@ -31,6 +30,8 @@ from dbgpt.rag.embedding.embedding_factory import EmbeddingFactory
from dbgpt.rag.knowledge.base import ChunkStrategy
from dbgpt.rag.knowledge.factory import KnowledgeFactory
from dbgpt.rag.retriever.embedding import EmbeddingRetriever
from dbgpt.serve.rag.api.schemas import KnowledgeSyncRequest
from dbgpt.serve.rag.service.service import Service
from dbgpt.storage.vector_store.base import VectorStoreConfig
from dbgpt.storage.vector_store.connector import VectorStoreConnector
from dbgpt.util.tracer import SpanType, root_tracer
@@ -44,6 +45,11 @@ router = APIRouter()
knowledge_space_service = KnowledgeService()
def get_rag_service() -> Service:
"""Get Rag Service."""
return Service.get_instance(CFG.SYSTEM_APP)
@router.post("/knowledge/space/add")
def space_add(request: KnowledgeSpaceRequest):
print(f"/space/add params: {request}")
@@ -226,12 +232,20 @@ def document_sync(space_name: str, request: DocumentSyncRequest):
@router.post("/knowledge/{space_name}/document/sync_batch")
def batch_document_sync(space_name: str, request: List[KnowledgeSyncRequest]):
def batch_document_sync(
space_name: str,
request: List[KnowledgeSyncRequest],
service: Service = Depends(get_rag_service),
):
logger.info(f"Received params: {space_name}, {request}")
try:
doc_ids = knowledge_space_service.batch_document_sync(
space_name=space_name, sync_requests=request
)
space = service.get({"name": space_name})
for sync_request in request:
sync_request.space_id = space.id
doc_ids = service.sync_document(requests=request)
# doc_ids = service.sync_document(
# space_name=space_name, sync_requests=request
# )
return Result.succ({"tasks": doc_ids})
except Exception as e:
return Result.failed(code="E000X", msg=f"document sync error {e}")

View File

@@ -1,9 +1,11 @@
from datetime import datetime
from typing import List
from typing import Any, Dict, List, Union
from sqlalchemy import Column, DateTime, Integer, String, Text, func
from dbgpt._private.config import Config
from dbgpt.serve.conversation.api.schemas import ServeRequest
from dbgpt.serve.rag.api.schemas import DocumentServeRequest, DocumentServeResponse
from dbgpt.storage.metadata import BaseDao, Model
CFG = Config()
@@ -218,3 +220,70 @@ class KnowledgeDocumentDao(BaseDao):
knowledge_documents.delete()
session.commit()
session.close()
def from_request(
self, request: Union[ServeRequest, Dict[str, Any]]
) -> KnowledgeDocumentEntity:
"""Convert the request to an entity
Args:
request (Union[ServeRequest, Dict[str, Any]]): The request
Returns:
T: The entity
"""
request_dict = (
request.dict() if isinstance(request, DocumentServeRequest) else request
)
entity = KnowledgeDocumentEntity(**request_dict)
return entity
def to_request(self, entity: KnowledgeDocumentEntity) -> DocumentServeResponse:
"""Convert the entity to a request
Args:
entity (T): The entity
Returns:
REQ: The request
"""
return DocumentServeResponse(
id=entity.id,
doc_name=entity.doc_name,
doc_type=entity.doc_type,
space=entity.space,
chunk_size=entity.chunk_size,
status=entity.status,
last_sync=entity.last_sync,
content=entity.content,
result=entity.result,
vector_ids=entity.vector_ids,
summary=entity.summary,
gmt_created=entity.gmt_created,
gmt_modified=entity.gmt_modified,
)
def to_response(self, entity: KnowledgeDocumentEntity) -> DocumentServeResponse:
"""Convert the entity to a response
Args:
entity (T): The entity
Returns:
REQ: The request
"""
return DocumentServeResponse(
id=entity.id,
doc_name=entity.doc_name,
doc_type=entity.doc_type,
space=entity.space,
chunk_size=entity.chunk_size,
status=entity.status,
last_sync=entity.last_sync,
content=entity.content,
result=entity.result,
vector_ids=entity.vector_ids,
summary=entity.summary,
gmt_created=entity.gmt_created,
gmt_modified=entity.gmt_modified,
)

View File

@@ -17,6 +17,8 @@ class KnowledgeQueryRequest(BaseModel):
class KnowledgeSpaceRequest(BaseModel):
"""name: knowledge space name"""
"""vector_type: vector type"""
id: int = None
name: str = None
"""vector_type: vector type"""
vector_type: str = None
@@ -37,9 +39,6 @@ class KnowledgeDocumentRequest(BaseModel):
"""content: content"""
source: str = None
"""text_chunk_size: text_chunk_size"""
# text_chunk_size: int
class DocumentQueryRequest(BaseModel):
"""doc_name: doc path"""
@@ -80,18 +79,18 @@ class DocumentSyncRequest(BaseModel):
chunk_overlap: Optional[int] = None
class KnowledgeSyncRequest(BaseModel):
"""Sync request"""
"""doc_ids: doc ids"""
doc_id: int
"""model_name: model name"""
model_name: Optional[str] = None
"""chunk_parameters: chunk parameters
"""
chunk_parameters: ChunkParameters
# class KnowledgeSyncRequest(BaseModel):
# """Sync request"""
#
# """doc_ids: doc ids"""
# doc_id: int
#
# """model_name: model name"""
# model_name: Optional[str] = None
#
# """chunk_parameters: chunk parameters
# """
# chunk_parameters: ChunkParameters
class ChunkQueryRequest(BaseModel):

View File

@@ -1,7 +1,6 @@
import json
import logging
from datetime import datetime
from enum import Enum
from typing import List
from dbgpt._private.config import Config
@@ -17,7 +16,6 @@ from dbgpt.app.knowledge.request.request import (
DocumentSyncRequest,
KnowledgeDocumentRequest,
KnowledgeSpaceRequest,
KnowledgeSyncRequest,
SpaceArgumentRequest,
)
from dbgpt.app.knowledge.request.response import (
@@ -25,7 +23,6 @@ from dbgpt.app.knowledge.request.response import (
DocumentQueryResponse,
SpaceQueryResponse,
)
from dbgpt.app.knowledge.space_db import KnowledgeSpaceDao, KnowledgeSpaceEntity
from dbgpt.component import ComponentType
from dbgpt.configs.model_config import EMBEDDING_MODEL_CONFIG
from dbgpt.core import Chunk
@@ -38,8 +35,11 @@ from dbgpt.rag.text_splitter.text_splitter import (
RecursiveCharacterTextSplitter,
SpacyTextSplitter,
)
from dbgpt.serve.rag.api.schemas import KnowledgeSyncRequest
from dbgpt.serve.rag.assembler.embedding import EmbeddingAssembler
from dbgpt.serve.rag.assembler.summary import SummaryAssembler
from dbgpt.serve.rag.models.models import KnowledgeSpaceDao, KnowledgeSpaceEntity
from dbgpt.serve.rag.service.service import Service, SyncStatus
from dbgpt.storage.vector_store.base import VectorStoreConfig
from dbgpt.storage.vector_store.connector import VectorStoreConnector
from dbgpt.util.executor_utils import ExecutorFactory, blocking_func_to_async
@@ -53,13 +53,6 @@ logger = logging.getLogger(__name__)
CFG = Config()
class SyncStatus(Enum):
TODO = "TODO"
FAILED = "FAILED"
RUNNING = "RUNNING"
FINISHED = "FINISHED"
# default summary max iteration call with llm.
DEFAULT_SUMMARY_MAX_ITERATION = 5
# default summary concurrency call with llm.
@@ -88,8 +81,8 @@ class KnowledgeService:
spaces = knowledge_space_dao.get_knowledge_space(query)
if len(spaces) > 0:
raise Exception(f"space name:{request.name} have already named")
knowledge_space_dao.create_knowledge_space(request)
return True
space_id = knowledge_space_dao.create_knowledge_space(request)
return space_id
def create_knowledge_document(self, space, request: KnowledgeDocumentRequest):
"""create knowledge document
@@ -199,7 +192,9 @@ class KnowledgeService:
return res
def batch_document_sync(
self, space_name, sync_requests: List[KnowledgeSyncRequest]
self,
space_name,
sync_requests: List[KnowledgeSyncRequest],
) -> List[int]:
"""batch sync knowledge document chunk into vector store
Args:

View File

@@ -1,93 +1,93 @@
from datetime import datetime
from sqlalchemy import Column, DateTime, Integer, String, Text
from dbgpt._private.config import Config
from dbgpt.app.knowledge.request.request import KnowledgeSpaceRequest
from dbgpt.storage.metadata import BaseDao, Model
CFG = Config()
class KnowledgeSpaceEntity(Model):
__tablename__ = "knowledge_space"
id = Column(Integer, primary_key=True)
name = Column(String(100))
vector_type = Column(String(100))
desc = Column(String(100))
owner = Column(String(100))
context = Column(Text)
gmt_created = Column(DateTime)
gmt_modified = Column(DateTime)
def __repr__(self):
return f"KnowledgeSpaceEntity(id={self.id}, name='{self.name}', vector_type='{self.vector_type}', desc='{self.desc}', owner='{self.owner}' context='{self.context}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"
class KnowledgeSpaceDao(BaseDao):
def create_knowledge_space(self, space: KnowledgeSpaceRequest):
session = self.get_raw_session()
knowledge_space = KnowledgeSpaceEntity(
name=space.name,
vector_type=CFG.VECTOR_STORE_TYPE,
desc=space.desc,
owner=space.owner,
gmt_created=datetime.now(),
gmt_modified=datetime.now(),
)
session.add(knowledge_space)
session.commit()
session.close()
def get_knowledge_space(self, query: KnowledgeSpaceEntity):
session = self.get_raw_session()
knowledge_spaces = session.query(KnowledgeSpaceEntity)
if query.id is not None:
knowledge_spaces = knowledge_spaces.filter(
KnowledgeSpaceEntity.id == query.id
)
if query.name is not None:
knowledge_spaces = knowledge_spaces.filter(
KnowledgeSpaceEntity.name == query.name
)
if query.vector_type is not None:
knowledge_spaces = knowledge_spaces.filter(
KnowledgeSpaceEntity.vector_type == query.vector_type
)
if query.desc is not None:
knowledge_spaces = knowledge_spaces.filter(
KnowledgeSpaceEntity.desc == query.desc
)
if query.owner is not None:
knowledge_spaces = knowledge_spaces.filter(
KnowledgeSpaceEntity.owner == query.owner
)
if query.gmt_created is not None:
knowledge_spaces = knowledge_spaces.filter(
KnowledgeSpaceEntity.gmt_created == query.gmt_created
)
if query.gmt_modified is not None:
knowledge_spaces = knowledge_spaces.filter(
KnowledgeSpaceEntity.gmt_modified == query.gmt_modified
)
knowledge_spaces = knowledge_spaces.order_by(
KnowledgeSpaceEntity.gmt_created.desc()
)
result = knowledge_spaces.all()
session.close()
return result
def update_knowledge_space(self, space: KnowledgeSpaceEntity):
session = self.get_raw_session()
session.merge(space)
session.commit()
session.close()
return True
def delete_knowledge_space(self, space: KnowledgeSpaceEntity):
session = self.get_raw_session()
if space:
session.delete(space)
session.commit()
session.close()
# from datetime import datetime
#
# from sqlalchemy import Column, DateTime, Integer, String, Text
#
# from dbgpt._private.config import Config
# from dbgpt.app.knowledge.request.request import KnowledgeSpaceRequest
# from dbgpt.storage.metadata import BaseDao, Model
#
# CFG = Config()
#
#
# class KnowledgeSpaceEntity(Model):
# __tablename__ = "knowledge_space"
# id = Column(Integer, primary_key=True)
# name = Column(String(100))
# vector_type = Column(String(100))
# desc = Column(String(100))
# owner = Column(String(100))
# context = Column(Text)
# gmt_created = Column(DateTime)
# gmt_modified = Column(DateTime)
#
# def __repr__(self):
# return f"KnowledgeSpaceEntity(id={self.id}, name='{self.name}', vector_type='{self.vector_type}', desc='{self.desc}', owner='{self.owner}' context='{self.context}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"
#
#
# class KnowledgeSpaceDao(BaseDao):
# def create_knowledge_space(self, space: KnowledgeSpaceRequest):
# session = self.get_raw_session()
# knowledge_space = KnowledgeSpaceEntity(
# name=space.name,
# vector_type=CFG.VECTOR_STORE_TYPE,
# desc=space.desc,
# owner=space.owner,
# gmt_created=datetime.now(),
# gmt_modified=datetime.now(),
# )
# session.add(knowledge_space)
# session.commit()
# session.close()
#
# def get_knowledge_space(self, query: KnowledgeSpaceEntity):
# session = self.get_raw_session()
# knowledge_spaces = session.query(KnowledgeSpaceEntity)
# if query.id is not None:
# knowledge_spaces = knowledge_spaces.filter(
# KnowledgeSpaceEntity.id == query.id
# )
# if query.name is not None:
# knowledge_spaces = knowledge_spaces.filter(
# KnowledgeSpaceEntity.name == query.name
# )
# if query.vector_type is not None:
# knowledge_spaces = knowledge_spaces.filter(
# KnowledgeSpaceEntity.vector_type == query.vector_type
# )
# if query.desc is not None:
# knowledge_spaces = knowledge_spaces.filter(
# KnowledgeSpaceEntity.desc == query.desc
# )
# if query.owner is not None:
# knowledge_spaces = knowledge_spaces.filter(
# KnowledgeSpaceEntity.owner == query.owner
# )
# if query.gmt_created is not None:
# knowledge_spaces = knowledge_spaces.filter(
# KnowledgeSpaceEntity.gmt_created == query.gmt_created
# )
# if query.gmt_modified is not None:
# knowledge_spaces = knowledge_spaces.filter(
# KnowledgeSpaceEntity.gmt_modified == query.gmt_modified
# )
#
# knowledge_spaces = knowledge_spaces.order_by(
# KnowledgeSpaceEntity.gmt_created.desc()
# )
# result = knowledge_spaces.all()
# session.close()
# return result
#
# def update_knowledge_space(self, space: KnowledgeSpaceEntity):
# session = self.get_raw_session()
# session.merge(space)
# session.commit()
# session.close()
# return True
#
# def delete_knowledge_space(self, space: KnowledgeSpaceEntity):
# session = self.get_raw_session()
# if space:
# session.delete(space)
# session.commit()
# session.close()