docs:v0.3.1 docs

1.fmt
2.docs
This commit is contained in:
aries_ckt
2023-07-13 19:23:03 +08:00
parent 4e33e6ec2e
commit b5f3e079db
19 changed files with 374 additions and 179 deletions

View File

@@ -344,7 +344,14 @@ class Database:
return [
d[0]
for d in results
if d[0] not in ["information_schema", "performance_schema", "sys", "mysql", "knowledge_management"]
if d[0]
not in [
"information_schema",
"performance_schema",
"sys",
"mysql",
"knowledge_management",
]
]
def convert_sql_write_to_select(self, write_sql):
@@ -421,7 +428,13 @@ class Database:
session = self._db_sessions()
cursor = session.execute(text(f"SHOW CREATE TABLE {table_name}"))
ans = cursor.fetchall()
return ans[0][1]
res = ans[0][1]
res = re.sub(r"\s*ENGINE\s*=\s*InnoDB\s*", " ", res, flags=re.IGNORECASE)
res = re.sub(
r"\s*DEFAULT\s*CHARSET\s*=\s*\w+\s*", " ", res, flags=re.IGNORECASE
)
res = re.sub(r"\s*COLLATE\s*=\s*\w+\s*", " ", res, flags=re.IGNORECASE)
return res
def get_fields(self, table_name):
"""Get column fields about specified table."""

View File

@@ -2,7 +2,11 @@ from typing import Dict, List, Optional
from langchain.document_loaders import CSVLoader
from langchain.schema import Document
from langchain.text_splitter import TextSplitter, SpacyTextSplitter, RecursiveCharacterTextSplitter
from langchain.text_splitter import (
TextSplitter,
SpacyTextSplitter,
RecursiveCharacterTextSplitter,
)
from pilot.embedding_engine import SourceEmbedding, register
@@ -18,7 +22,9 @@ class CSVEmbedding(SourceEmbedding):
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize with csv path."""
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)
self.file_path = file_path
self.vector_store_config = vector_store_config
self.source_reader = source_reader or None

View File

@@ -28,7 +28,9 @@ class MarkdownEmbedding(SourceEmbedding):
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize raw text word path."""
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)
self.file_path = file_path
self.vector_store_config = vector_store_config
self.source_reader = source_reader or None

View File

@@ -24,7 +24,9 @@ class PDFEmbedding(SourceEmbedding):
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize pdf word path."""
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)
self.file_path = file_path
self.vector_store_config = vector_store_config
self.source_reader = source_reader or None

View File

@@ -24,7 +24,9 @@ class PPTEmbedding(SourceEmbedding):
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize ppt word path."""
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)
self.file_path = file_path
self.vector_store_config = vector_store_config
self.source_reader = source_reader or None

View File

@@ -1,7 +1,11 @@
from typing import List, Optional
from langchain.schema import Document
from langchain.text_splitter import TextSplitter, SpacyTextSplitter, RecursiveCharacterTextSplitter
from langchain.text_splitter import (
TextSplitter,
SpacyTextSplitter,
RecursiveCharacterTextSplitter,
)
from pilot.embedding_engine import SourceEmbedding, register
@@ -17,7 +21,12 @@ class StringEmbedding(SourceEmbedding):
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize raw text word path."""
super().__init__(file_path=file_path, vector_store_config=vector_store_config, source_reader=None, text_splitter=None)
super().__init__(
file_path=file_path,
vector_store_config=vector_store_config,
source_reader=None,
text_splitter=None,
)
self.file_path = file_path
self.vector_store_config = vector_store_config
self.source_reader = source_reader or None
@@ -32,16 +41,15 @@ class StringEmbedding(SourceEmbedding):
try:
self.text_splitter = SpacyTextSplitter(
pipeline="zh_core_web_sm",
chunk_size=100,
chunk_size=500,
chunk_overlap=100,
)
except Exception:
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=100, chunk_overlap=50
)
return self.text_splitter.split_documents(docs)
return self.text_splitter.split_documents(docs)
return docs
@register
def data_process(self, documents: List[Document]):

View File

@@ -23,13 +23,14 @@ class URLEmbedding(SourceEmbedding):
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize url word path."""
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)
self.file_path = file_path
self.vector_store_config = vector_store_config
self.source_reader = source_reader or None
self.text_splitter = text_splitter or None
@register
def read(self):
"""Load from url path."""

View File

@@ -24,7 +24,9 @@ class WordEmbedding(SourceEmbedding):
text_splitter: Optional[TextSplitter] = None,
):
"""Initialize with word path."""
super().__init__(file_path, vector_store_config, source_reader=None, text_splitter=None)
super().__init__(
file_path, vector_store_config, source_reader=None, text_splitter=None
)
self.file_path = file_path
self.vector_store_config = vector_store_config
self.source_reader = source_reader or None

View File

@@ -77,7 +77,6 @@ class DBSummaryClient:
def get_db_summary(self, dbname, query, topk):
vector_store_config = {
"vector_store_name": dbname + "_profile",
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
"vector_store_type": CFG.VECTOR_STORE_TYPE,
"chroma_persist_path": KNOWLEDGE_UPLOAD_ROOT_PATH,
}