feat(core): Upgrade pydantic to 2.x (#1428)

This commit is contained in:
Fangyin Cheng
2024-04-20 09:41:16 +08:00
committed by GitHub
parent baa1e3f9f6
commit 57be1ece18
103 changed files with 1146 additions and 534 deletions

View File

@@ -4,6 +4,7 @@ from typing import List
from sqlalchemy import Column, DateTime, Integer, String, Text, func
from dbgpt._private.config import Config
from dbgpt.serve.rag.api.schemas import DocumentChunkVO
from dbgpt.storage.metadata import BaseDao, Model
CFG = Config()
@@ -23,6 +24,22 @@ class DocumentChunkEntity(Model):
def __repr__(self):
return f"DocumentChunkEntity(id={self.id}, doc_name='{self.doc_name}', doc_type='{self.doc_type}', document_id='{self.document_id}', content='{self.content}', meta_info='{self.meta_info}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"
@classmethod
def to_to_document_chunk_vo(cls, entity_list: List["DocumentChunkEntity"]):
return [
DocumentChunkVO(
id=entity.id,
document_id=entity.document_id,
doc_name=entity.doc_name,
doc_type=entity.doc_type,
content=entity.content,
meta_info=entity.meta_info,
gmt_created=entity.gmt_created.strftime("%Y-%m-%d %H:%M:%S"),
gmt_modified=entity.gmt_modified.strftime("%Y-%m-%d %H:%M:%S"),
)
for entity in entity_list
]
class DocumentChunkDao(BaseDao):
def create_documents_chunks(self, documents: List):
@@ -45,7 +62,7 @@ class DocumentChunkDao(BaseDao):
def get_document_chunks(
self, query: DocumentChunkEntity, page=1, page_size=20, document_ids=None
):
) -> List[DocumentChunkVO]:
session = self.get_raw_session()
document_chunks = session.query(DocumentChunkEntity)
if query.id is not None:
@@ -81,7 +98,7 @@ class DocumentChunkDao(BaseDao):
)
result = document_chunks.all()
session.close()
return result
return DocumentChunkEntity.to_to_document_chunk_vo(result)
def get_document_chunks_count(self, query: DocumentChunkEntity):
session = self.get_raw_session()

View File

@@ -4,8 +4,13 @@ from typing import Any, Dict, List, Union
from sqlalchemy import Column, DateTime, Integer, String, Text, func
from dbgpt._private.config import Config
from dbgpt._private.pydantic import model_to_dict
from dbgpt.serve.conversation.api.schemas import ServeRequest
from dbgpt.serve.rag.api.schemas import DocumentServeRequest, DocumentServeResponse
from dbgpt.serve.rag.api.schemas import (
DocumentServeRequest,
DocumentServeResponse,
DocumentVO,
)
from dbgpt.storage.metadata import BaseDao, Model
CFG = Config()
@@ -30,6 +35,55 @@ class KnowledgeDocumentEntity(Model):
def __repr__(self):
return f"KnowledgeDocumentEntity(id={self.id}, doc_name='{self.doc_name}', doc_type='{self.doc_type}', chunk_size='{self.chunk_size}', status='{self.status}', last_sync='{self.last_sync}', content='{self.content}', result='{self.result}', summary='{self.summary}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"
@classmethod
def to_document_vo(
cls, entity_list: List["KnowledgeDocumentEntity"]
) -> List[DocumentVO]:
vo_results = []
for item in entity_list:
vo_results.append(
DocumentVO(
id=item.id,
doc_name=item.doc_name,
doc_type=item.doc_type,
space=item.space,
chunk_size=item.chunk_size,
status=item.status,
last_sync=item.last_sync.strftime("%Y-%m-%d %H:%M:%S"),
content=item.content,
result=item.result,
vector_ids=item.vector_ids,
summary=item.summary,
gmt_created=item.gmt_created.strftime("%Y-%m-%d %H:%M:%S"),
gmt_modified=item.gmt_modified.strftime("%Y-%m-%d %H:%M:%S"),
)
)
return vo_results
@classmethod
def from_document_vo(cls, vo: DocumentVO) -> "KnowledgeDocumentEntity":
entity = KnowledgeDocumentEntity(
id=vo.id,
doc_name=vo.doc_name,
doc_type=vo.doc_type,
space=vo.space,
chunk_size=vo.chunk_size,
status=vo.status,
content=vo.content,
result=vo.result,
vector_ids=vo.vector_ids,
summary=vo.summary,
)
if vo.last_sync:
entity.last_sync = datetime.strptime(vo.last_sync, "%Y-%m-%d %H:%M:%S")
if vo.gmt_created:
entity.gmt_created = datetime.strptime(vo.gmt_created, "%Y-%m-%d %H:%M:%S")
if vo.gmt_modified:
entity.gmt_modified = datetime.strptime(
vo.gmt_modified, "%Y-%m-%d %H:%M:%S"
)
return entity
class KnowledgeDocumentDao(BaseDao):
def create_knowledge_document(self, document: KnowledgeDocumentEntity):
@@ -53,7 +107,7 @@ class KnowledgeDocumentDao(BaseDao):
session.close()
return doc_id
def get_knowledge_documents(self, query, page=1, page_size=20):
def get_knowledge_documents(self, query, page=1, page_size=20) -> List[DocumentVO]:
"""Get a list of documents that match the given query.
Args:
query: A KnowledgeDocumentEntity object containing the query parameters.
@@ -92,9 +146,9 @@ class KnowledgeDocumentDao(BaseDao):
)
result = knowledge_documents.all()
session.close()
return result
return KnowledgeDocumentEntity.to_document_vo(result)
def documents_by_ids(self, ids) -> List[KnowledgeDocumentEntity]:
def documents_by_ids(self, ids) -> List[DocumentVO]:
"""Get a list of documents by their IDs.
Args:
ids: A list of document IDs.
@@ -109,7 +163,7 @@ class KnowledgeDocumentDao(BaseDao):
)
result = knowledge_documents.all()
session.close()
return result
return KnowledgeDocumentEntity.to_document_vo(result)
def get_documents(self, query):
session = self.get_raw_session()
@@ -233,7 +287,9 @@ class KnowledgeDocumentDao(BaseDao):
T: The entity
"""
request_dict = (
request.dict() if isinstance(request, DocumentServeRequest) else request
model_to_dict(request)
if isinstance(request, DocumentServeRequest)
else request
)
entity = KnowledgeDocumentEntity(**request_dict)
return entity

View File

@@ -1,9 +1,6 @@
from typing import List, Optional
from fastapi import UploadFile
from dbgpt._private.pydantic import BaseModel
from dbgpt.rag.chunk_manager import ChunkParameters
from dbgpt._private.pydantic import BaseModel, ConfigDict
class KnowledgeQueryRequest(BaseModel):
@@ -59,6 +56,8 @@ class DocumentQueryRequest(BaseModel):
class DocumentSyncRequest(BaseModel):
"""Sync request"""
model_config = ConfigDict(protected_namespaces=())
"""doc_ids: doc ids"""
doc_ids: List
@@ -104,6 +103,8 @@ class SpaceArgumentRequest(BaseModel):
class DocumentSummaryRequest(BaseModel):
"""Sync request"""
model_config = ConfigDict(protected_namespaces=())
"""doc_ids: doc ids"""
doc_id: int
model_name: str
@@ -113,5 +114,7 @@ class DocumentSummaryRequest(BaseModel):
class EntityExtractRequest(BaseModel):
"""argument: argument"""
model_config = ConfigDict(protected_namespaces=())
text: str
model_name: str

View File

@@ -1,28 +1,29 @@
from typing import List
from typing import List, Optional
from dbgpt._private.pydantic import BaseModel
from dbgpt._private.pydantic import BaseModel, Field
from dbgpt.serve.rag.api.schemas import DocumentChunkVO, DocumentVO
class ChunkQueryResponse(BaseModel):
"""data: data"""
data: List = None
data: List[DocumentChunkVO] = Field(..., description="document chunk list")
"""summary: document summary"""
summary: str = None
summary: Optional[str] = Field(None, description="document summary")
"""total: total size"""
total: int = None
total: Optional[int] = Field(None, description="total size")
"""page: current page"""
page: int = None
page: Optional[int] = Field(None, description="current page")
class DocumentQueryResponse(BaseModel):
"""data: data"""
data: List = None
data: List[DocumentVO] = Field(..., description="document list")
"""total: total size"""
total: int = None
total: Optional[int] = Field(None, description="total size")
"""page: current page"""
page: int = None
page: Optional[int] = Field(None, description="current page")
class SpaceQueryResponse(BaseModel):

View File

@@ -174,9 +174,11 @@ class KnowledgeService:
Returns:
- res DocumentQueryResponse
"""
res = DocumentQueryResponse()
total = None
page = request.page
if request.doc_ids and len(request.doc_ids) > 0:
res.data = knowledge_document_dao.documents_by_ids(request.doc_ids)
data = knowledge_document_dao.documents_by_ids(request.doc_ids)
else:
query = KnowledgeDocumentEntity(
doc_name=request.doc_name,
@@ -184,12 +186,11 @@ class KnowledgeService:
space=space,
status=request.status,
)
res.data = knowledge_document_dao.get_knowledge_documents(
data = knowledge_document_dao.get_knowledge_documents(
query, page=request.page, page_size=request.page_size
)
res.total = knowledge_document_dao.get_knowledge_documents_count(query)
res.page = request.page
return res
total = knowledge_document_dao.get_knowledge_documents_count(query)
return DocumentQueryResponse(data=data, total=total, page=page)
def batch_document_sync(
self,
@@ -505,13 +506,15 @@ class KnowledgeService:
document_query = KnowledgeDocumentEntity(id=request.document_id)
documents = knowledge_document_dao.get_documents(document_query)
res = ChunkQueryResponse()
res.data = document_chunk_dao.get_document_chunks(
data = document_chunk_dao.get_document_chunks(
query, page=request.page, page_size=request.page_size
)
res.summary = documents[0].summary
res.total = document_chunk_dao.get_document_chunks_count(query)
res.page = request.page
res = ChunkQueryResponse(
data=data,
summary=documents[0].summary,
total=document_chunk_dao.get_document_chunks_count(query),
page=request.page,
)
return res
@trace("async_doc_embedding")