mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-10-22 17:39:02 +00:00
fork file replace import
This commit is contained in:
41
pilot/vector_store/file_loader.py
Normal file
41
pilot/vector_store/file_loader.py
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.vectorstores import Chroma
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.document_loaders import UnstructuredFileLoader, UnstructuredPDFLoader
|
||||
|
||||
VECTOR_SEARCH_TOP_K = 5
|
||||
|
||||
class BaseKnownLedgeQA:
|
||||
|
||||
llm: object = None
|
||||
embeddings: object = None
|
||||
|
||||
top_k: int = VECTOR_SEARCH_TOP_K
|
||||
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def init_vector_store(self):
|
||||
pass
|
||||
|
||||
def load_knownlege(self):
|
||||
pass
|
||||
|
||||
def _load_file(self, filename):
|
||||
# 加载文件
|
||||
if filename.lower().endswith(".pdf"):
|
||||
loader = UnstructuredFileLoader(filename)
|
||||
text_splitor = CharacterTextSplitter()
|
||||
docs = loader.load_and_split(text_splitor)
|
||||
else:
|
||||
loader = UnstructuredFileLoader(filename, mode="elements")
|
||||
text_splitor = CharacterTextSplitter()
|
||||
docs = loader.load_and_split(text_splitor)
|
||||
return docs
|
||||
|
||||
def _load_from_url(self, url):
|
||||
pass
|
||||
|
Reference in New Issue
Block a user