mirror of
https://github.com/csunny/DB-GPT.git
synced 2026-01-29 21:49:35 +00:00
doc:update dbgpt_demo.mp4
1.update dbgpt_demo.mp4 2.format code
This commit is contained in:
@@ -6,7 +6,11 @@ from typing import List
|
||||
import markdown
|
||||
from bs4 import BeautifulSoup
|
||||
from langchain.schema import Document
|
||||
from langchain.text_splitter import SpacyTextSplitter, CharacterTextSplitter, RecursiveCharacterTextSplitter
|
||||
from langchain.text_splitter import (
|
||||
SpacyTextSplitter,
|
||||
CharacterTextSplitter,
|
||||
RecursiveCharacterTextSplitter,
|
||||
)
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
@@ -44,7 +48,9 @@ class MarkdownEmbedding(SourceEmbedding):
|
||||
chunk_overlap=100,
|
||||
)
|
||||
except Exception:
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50)
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50
|
||||
)
|
||||
return loader.load_and_split(text_splitter)
|
||||
|
||||
@register
|
||||
|
||||
@@ -47,7 +47,9 @@ class PDFEmbedding(SourceEmbedding):
|
||||
chunk_overlap=100,
|
||||
)
|
||||
except Exception:
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50)
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50
|
||||
)
|
||||
return loader.load_and_split(text_splitter)
|
||||
|
||||
@register
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import List
|
||||
|
||||
from langchain.document_loaders import UnstructuredPowerPointLoader
|
||||
from langchain.schema import Document
|
||||
from langchain.text_splitter import SpacyTextSplitter, RecursiveCharacterTextSplitter
|
||||
from langchain.text_splitter import SpacyTextSplitter, RecursiveCharacterTextSplitter
|
||||
|
||||
from pilot.configs.config import Config
|
||||
from pilot.embedding_engine import SourceEmbedding, register
|
||||
@@ -45,7 +45,9 @@ class PPTEmbedding(SourceEmbedding):
|
||||
chunk_overlap=100,
|
||||
)
|
||||
except Exception:
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50)
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50
|
||||
)
|
||||
return loader.load_and_split(text_splitter)
|
||||
|
||||
@register
|
||||
|
||||
@@ -40,7 +40,9 @@ class URLEmbedding(SourceEmbedding):
|
||||
chunk_overlap=100,
|
||||
)
|
||||
except Exception:
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50)
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50
|
||||
)
|
||||
return loader.load_and_split(text_splitter)
|
||||
|
||||
@register
|
||||
|
||||
@@ -39,7 +39,9 @@ class WordEmbedding(SourceEmbedding):
|
||||
chunk_overlap=100,
|
||||
)
|
||||
except Exception:
|
||||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50)
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50
|
||||
)
|
||||
return loader.load_and_split(text_splitter)
|
||||
|
||||
@register
|
||||
|
||||
Reference in New Issue
Block a user