mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-08-18 08:17:38 +00:00
fix:word embedding update
1.use Docx2txtLoader replace UnstructuredWordDocumentLoader
This commit is contained in:
parent
dbf8b20c0b
commit
45fbcafbf6
@ -2,8 +2,8 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from langchain.document_loaders import UnstructuredWordDocumentLoader
|
|
||||||
from langchain.schema import Document
|
from langchain.schema import Document
|
||||||
|
from langchain.document_loaders import Docx2txtLoader
|
||||||
from langchain.text_splitter import (
|
from langchain.text_splitter import (
|
||||||
SpacyTextSplitter,
|
SpacyTextSplitter,
|
||||||
RecursiveCharacterTextSplitter,
|
RecursiveCharacterTextSplitter,
|
||||||
@ -36,7 +36,7 @@ class WordEmbedding(SourceEmbedding):
|
|||||||
def read(self):
|
def read(self):
|
||||||
"""Load from word path."""
|
"""Load from word path."""
|
||||||
if self.source_reader is None:
|
if self.source_reader is None:
|
||||||
self.source_reader = UnstructuredWordDocumentLoader(self.file_path)
|
self.source_reader = Docx2txtLoader(self.file_path)
|
||||||
if self.text_splitter is None:
|
if self.text_splitter is None:
|
||||||
try:
|
try:
|
||||||
self.text_splitter = SpacyTextSplitter(
|
self.text_splitter = SpacyTextSplitter(
|
||||||
|
Loading…
Reference in New Issue
Block a user