mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 15:43:54 +00:00
DROP BOX Loader Documentation Update (#14047)
- **Description:** Update the document for drop box loader + made the messages more verbose when loading pdf file since people were getting confused - **Issue:** #13952 - **Tag maintainer:** @baskaryan, @eyurtsev, @hwchase17, --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
a00db4b28f
commit
f3dd4a10cf
@ -15,7 +15,7 @@
|
||||
"1. Create a Dropbox app.\n",
|
||||
"2. Give the app these scope permissions: `files.metadata.read` and `files.content.read`.\n",
|
||||
"3. Generate access token: https://www.dropbox.com/developers/apps/create.\n",
|
||||
"4. `pip install dropbox` (requires `pip install unstructured` for PDF filetype).\n",
|
||||
"4. `pip install dropbox` (requires `pip install \"unstructured[pdf]\"` for PDF filetype).\n",
|
||||
"\n",
|
||||
"## Instructions\n",
|
||||
"\n",
|
||||
|
@ -3,7 +3,7 @@
|
||||
# 2. Give the app these scope permissions: `files.metadata.read`
|
||||
# and `files.content.read`.
|
||||
# 3. Generate access token: https://www.dropbox.com/developers/apps/create.
|
||||
# 4. `pip install dropbox` (requires `pip install unstructured` for PDF filetype).
|
||||
# 4. `pip install dropbox` (requires `pip install unstructured[pdf]` for PDF filetype).
|
||||
|
||||
|
||||
import os
|
||||
@ -118,11 +118,10 @@ class DropboxLoader(BaseLoader, BaseModel):
|
||||
try:
|
||||
text = response.content.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
print(f"File {file_path} could not be decoded as text. Skipping.")
|
||||
|
||||
file_extension = os.path.splitext(file_path)[1].lower()
|
||||
|
||||
if file_extension == ".pdf":
|
||||
print(f"File {file_path} type detected as .pdf")
|
||||
from langchain.document_loaders import UnstructuredPDFLoader
|
||||
|
||||
# Download it to a temporary file.
|
||||
@ -139,6 +138,10 @@ class DropboxLoader(BaseLoader, BaseModel):
|
||||
except Exception as pdf_ex:
|
||||
print(f"Error while trying to parse PDF {file_path}: {pdf_ex}")
|
||||
return None
|
||||
else:
|
||||
print(
|
||||
f"File {file_path} could not be decoded as pdf or text. Skipping."
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user