mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-16 06:30:02 +00:00
docs:v0.3.1 docs
1.fmt 2.docs
This commit is contained in:
@@ -344,7 +344,14 @@ class Database:
|
||||
return [
|
||||
d[0]
|
||||
for d in results
|
||||
if d[0] not in ["information_schema", "performance_schema", "sys", "mysql", "knowledge_management"]
|
||||
if d[0]
|
||||
not in [
|
||||
"information_schema",
|
||||
"performance_schema",
|
||||
"sys",
|
||||
"mysql",
|
||||
"knowledge_management",
|
||||
]
|
||||
]
|
||||
|
||||
def convert_sql_write_to_select(self, write_sql):
|
||||
@@ -421,7 +428,13 @@ class Database:
|
||||
session = self._db_sessions()
|
||||
cursor = session.execute(text(f"SHOW CREATE TABLE {table_name}"))
|
||||
ans = cursor.fetchall()
|
||||
return ans[0][1]
|
||||
res = ans[0][1]
|
||||
res = re.sub(r"\s*ENGINE\s*=\s*InnoDB\s*", " ", res, flags=re.IGNORECASE)
|
||||
res = re.sub(
|
||||
r"\s*DEFAULT\s*CHARSET\s*=\s*\w+\s*", " ", res, flags=re.IGNORECASE
|
||||
)
|
||||
res = re.sub(r"\s*COLLATE\s*=\s*\w+\s*", " ", res, flags=re.IGNORECASE)
|
||||
return res
|
||||
|
||||
def get_fields(self, table_name):
|
||||
"""Get column fields about specified table."""
|
||||
|
@@ -2,7 +2,11 @@ from typing import Dict, List, Optional
|
||||
|
||||
from langchain.document_loaders import CSVLoader
|
||||
from langchain.schema import Document
|
||||
from langchain.text_splitter import TextSplitter, SpacyTextSplitter, RecursiveCharacterTextSplitter
|
||||
from langchain.text_splitter import (
|
||||
TextSplitter,
|
||||
SpacyTextSplitter,
|
||||
RecursiveCharacterTextSplitter,
|
||||
)
|
||||
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
|
||||
@@ -18,7 +22,9 @@ class CSVEmbedding(SourceEmbedding):
|
||||
text_splitter: Optional[TextSplitter] = None,
|
||||
):
|
||||
"""Initialize with csv path."""
|
||||
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
|
||||
super().__init__(
|
||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||
)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
self.source_reader = source_reader or None
|
||||
|
@@ -28,7 +28,9 @@ class MarkdownEmbedding(SourceEmbedding):
|
||||
text_splitter: Optional[TextSplitter] = None,
|
||||
):
|
||||
"""Initialize raw text word path."""
|
||||
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
|
||||
super().__init__(
|
||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||
)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
self.source_reader = source_reader or None
|
||||
|
@@ -24,7 +24,9 @@ class PDFEmbedding(SourceEmbedding):
|
||||
text_splitter: Optional[TextSplitter] = None,
|
||||
):
|
||||
"""Initialize pdf word path."""
|
||||
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
|
||||
super().__init__(
|
||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||
)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
self.source_reader = source_reader or None
|
||||
|
@@ -24,7 +24,9 @@ class PPTEmbedding(SourceEmbedding):
|
||||
text_splitter: Optional[TextSplitter] = None,
|
||||
):
|
||||
"""Initialize ppt word path."""
|
||||
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
|
||||
super().__init__(
|
||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||
)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
self.source_reader = source_reader or None
|
||||
|
@@ -1,7 +1,11 @@
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain.schema import Document
|
||||
from langchain.text_splitter import TextSplitter, SpacyTextSplitter, RecursiveCharacterTextSplitter
|
||||
from langchain.text_splitter import (
|
||||
TextSplitter,
|
||||
SpacyTextSplitter,
|
||||
RecursiveCharacterTextSplitter,
|
||||
)
|
||||
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
|
||||
@@ -17,7 +21,12 @@ class StringEmbedding(SourceEmbedding):
|
||||
text_splitter: Optional[TextSplitter] = None,
|
||||
):
|
||||
"""Initialize raw text word path."""
|
||||
super().__init__(file_path=file_path, vector_store_config=vector_store_config, source_reader=None, text_splitter=None)
|
||||
super().__init__(
|
||||
file_path=file_path,
|
||||
vector_store_config=vector_store_config,
|
||||
source_reader=None,
|
||||
text_splitter=None,
|
||||
)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
self.source_reader = source_reader or None
|
||||
@@ -32,16 +41,15 @@ class StringEmbedding(SourceEmbedding):
|
||||
try:
|
||||
self.text_splitter = SpacyTextSplitter(
|
||||
pipeline="zh_core_web_sm",
|
||||
chunk_size=100,
|
||||
chunk_size=500,
|
||||
chunk_overlap=100,
|
||||
)
|
||||
except Exception:
|
||||
self.text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=100, chunk_overlap=50
|
||||
)
|
||||
|
||||
return self.text_splitter.split_documents(docs)
|
||||
|
||||
return self.text_splitter.split_documents(docs)
|
||||
return docs
|
||||
|
||||
@register
|
||||
def data_process(self, documents: List[Document]):
|
||||
|
@@ -23,13 +23,14 @@ class URLEmbedding(SourceEmbedding):
|
||||
text_splitter: Optional[TextSplitter] = None,
|
||||
):
|
||||
"""Initialize url word path."""
|
||||
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
|
||||
super().__init__(
|
||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||
)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
self.source_reader = source_reader or None
|
||||
self.text_splitter = text_splitter or None
|
||||
|
||||
|
||||
@register
|
||||
def read(self):
|
||||
"""Load from url path."""
|
||||
|
@@ -24,7 +24,9 @@ class WordEmbedding(SourceEmbedding):
|
||||
text_splitter: Optional[TextSplitter] = None,
|
||||
):
|
||||
"""Initialize with word path."""
|
||||
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
|
||||
super().__init__(
|
||||
file_path, vector_store_config, source_reader=None, text_splitter=None
|
||||
)
|
||||
self.file_path = file_path
|
||||
self.vector_store_config = vector_store_config
|
||||
self.source_reader = source_reader or None
|
||||
|
@@ -77,7 +77,6 @@ class DBSummaryClient:
|
||||
def get_db_summary(self, dbname, query, topk):
|
||||
vector_store_config = {
|
||||
"vector_store_name": dbname + "_profile",
|
||||
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
|
||||
"vector_store_type": CFG.VECTOR_STORE_TYPE,
|
||||
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
|
||||
}
|
||||
|
Reference in New Issue
Block a user