mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-10-09 10:33:29 +00:00
128 lines
4.7 KiB
Python
128 lines
4.7 KiB
Python
from datetime import datetime
|
|
from typing import List
|
|
|
|
from sqlalchemy import Column, String, DateTime, Integer, Text, func
|
|
from sqlalchemy.orm import declarative_base
|
|
|
|
from pilot.configs.config import Config
|
|
from pilot.connections.rdbms.base_dao import BaseDao
|
|
|
|
CFG = Config()
|
|
|
|
Base = declarative_base()
|
|
|
|
|
|
class DocumentChunkEntity(Base):
|
|
__tablename__ = "document_chunk"
|
|
id = Column(Integer, primary_key=True)
|
|
document_id = Column(Integer)
|
|
doc_name = Column(String(100))
|
|
doc_type = Column(String(100))
|
|
content = Column(Text)
|
|
meta_info = Column(String(500))
|
|
gmt_created = Column(DateTime)
|
|
gmt_modified = Column(DateTime)
|
|
|
|
def __repr__(self):
|
|
return f"DocumentChunkEntity(id={self.id}, doc_name='{self.doc_name}', doc_type='{self.doc_type}', document_id='{self.document_id}', content='{self.content}', meta_info='{self.meta_info}', gmt_created='{self.gmt_created}', gmt_modified='{self.gmt_modified}')"
|
|
|
|
|
|
class DocumentChunkDao(BaseDao):
|
|
def __init__(self):
|
|
super().__init__(
|
|
database="knowledge_management", orm_base=Base, create_not_exist_table=True
|
|
)
|
|
|
|
def create_documents_chunks(self, documents: List):
|
|
session = self.Session()
|
|
docs = [
|
|
DocumentChunkEntity(
|
|
doc_name=document.doc_name,
|
|
doc_type=document.doc_type,
|
|
document_id=document.document_id,
|
|
content=document.content or "",
|
|
meta_info=document.meta_info or "",
|
|
gmt_created=datetime.now(),
|
|
gmt_modified=datetime.now(),
|
|
)
|
|
for document in documents
|
|
]
|
|
session.add_all(docs)
|
|
session.commit()
|
|
session.close()
|
|
|
|
def get_document_chunks(self, query: DocumentChunkEntity, page=1, page_size=20):
|
|
session = self.Session()
|
|
document_chunks = session.query(DocumentChunkEntity)
|
|
if query.id is not None:
|
|
document_chunks = document_chunks.filter(DocumentChunkEntity.id == query.id)
|
|
if query.document_id is not None:
|
|
document_chunks = document_chunks.filter(
|
|
DocumentChunkEntity.document_id == query.document_id
|
|
)
|
|
if query.doc_type is not None:
|
|
document_chunks = document_chunks.filter(
|
|
DocumentChunkEntity.doc_type == query.doc_type
|
|
)
|
|
if query.doc_name is not None:
|
|
document_chunks = document_chunks.filter(
|
|
DocumentChunkEntity.doc_name == query.doc_name
|
|
)
|
|
if query.meta_info is not None:
|
|
document_chunks = document_chunks.filter(
|
|
DocumentChunkEntity.meta_info == query.meta_info
|
|
)
|
|
|
|
document_chunks = document_chunks.order_by(DocumentChunkEntity.id.desc())
|
|
document_chunks = document_chunks.offset((page - 1) * page_size).limit(
|
|
page_size
|
|
)
|
|
result = document_chunks.all()
|
|
session.close()
|
|
return result
|
|
|
|
def get_document_chunks_count(self, query: DocumentChunkEntity):
|
|
session = self.Session()
|
|
document_chunks = session.query(func.count(DocumentChunkEntity.id))
|
|
if query.id is not None:
|
|
document_chunks = document_chunks.filter(DocumentChunkEntity.id == query.id)
|
|
if query.document_id is not None:
|
|
document_chunks = document_chunks.filter(
|
|
DocumentChunkEntity.document_id == query.document_id
|
|
)
|
|
if query.doc_type is not None:
|
|
document_chunks = document_chunks.filter(
|
|
DocumentChunkEntity.doc_type == query.doc_type
|
|
)
|
|
if query.doc_name is not None:
|
|
document_chunks = document_chunks.filter(
|
|
DocumentChunkEntity.doc_name == query.doc_name
|
|
)
|
|
if query.meta_info is not None:
|
|
document_chunks = document_chunks.filter(
|
|
DocumentChunkEntity.meta_info == query.meta_info
|
|
)
|
|
count = document_chunks.scalar()
|
|
session.close()
|
|
return count
|
|
|
|
# def update_knowledge_document(self, document:KnowledgeDocumentEntity):
|
|
# session = self.Session()
|
|
# updated_space = session.merge(document)
|
|
# session.commit()
|
|
# return updated_space.id
|
|
|
|
def delete(self, document_id: int):
|
|
session = self.Session()
|
|
if document_id is None:
|
|
raise Exception("document_id is None")
|
|
query = DocumentChunkEntity(document_id=document_id)
|
|
knowledge_documents = session.query(DocumentChunkEntity)
|
|
if query.document_id is not None:
|
|
chunks = knowledge_documents.filter(
|
|
DocumentChunkEntity.document_id == query.document_id
|
|
)
|
|
chunks.delete()
|
|
session.commit()
|
|
session.close()
|