doc:update dbgpt_demo.mp4

1.update dbgpt_demo.mp4
2.format code
This commit is contained in:
aries_ckt
2023-07-06 13:47:46 +08:00
parent 47595aa10f
commit eb31d5523e
31 changed files with 243 additions and 128 deletions

View File

@@ -6,7 +6,11 @@ from typing import List
import markdown
from bs4 import BeautifulSoup
from langchain.schema import Document
from langchain.text_splitter import SpacyTextSplitter, CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.text_splitter import (
SpacyTextSplitter,
CharacterTextSplitter,
RecursiveCharacterTextSplitter,
)
from pilot.configs.config import Config
from pilot.embedding_engine import SourceEmbedding, register
@@ -44,7 +48,9 @@ class MarkdownEmbedding(SourceEmbedding):
chunk_overlap=100,
)
except Exception:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50
)
return loader.load_and_split(text_splitter)
@register

View File

@@ -47,7 +47,9 @@ class PDFEmbedding(SourceEmbedding):
chunk_overlap=100,
)
except Exception:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50
)
return loader.load_and_split(text_splitter)
@register

View File

@@ -4,7 +4,7 @@ from typing import List
from langchain.document_loaders import UnstructuredPowerPointLoader
from langchain.schema import Document
from langchain.text_splitter import SpacyTextSplitter, RecursiveCharacterTextSplitter
from langchain.text_splitter import SpacyTextSplitter, RecursiveCharacterTextSplitter
from pilot.configs.config import Config
from pilot.embedding_engine import SourceEmbedding, register
@@ -45,7 +45,9 @@ class PPTEmbedding(SourceEmbedding):
chunk_overlap=100,
)
except Exception:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50
)
return loader.load_and_split(text_splitter)
@register

View File

@@ -40,7 +40,9 @@ class URLEmbedding(SourceEmbedding):
chunk_overlap=100,
)
except Exception:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50
)
return loader.load_and_split(text_splitter)
@register

View File

@@ -39,7 +39,9 @@ class WordEmbedding(SourceEmbedding):
chunk_overlap=100,
)
except Exception:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=CFG.KNOWLEDGE_CHUNK_SIZE, chunk_overlap=50
)
return loader.load_and_split(text_splitter)
@register