mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-24 23:54:14 +00:00
DROP BOX Loader Documentation Update (#14047)
- **Description:** Update the document for drop box loader + made the messages more verbose when loading pdf file since people were getting confused - **Issue:** #13952 - **Tag maintainer:** @baskaryan, @eyurtsev, @hwchase17, --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
a00db4b28f
commit
f3dd4a10cf
@ -15,7 +15,7 @@
|
|||||||
"1. Create a Dropbox app.\n",
|
"1. Create a Dropbox app.\n",
|
||||||
"2. Give the app these scope permissions: `files.metadata.read` and `files.content.read`.\n",
|
"2. Give the app these scope permissions: `files.metadata.read` and `files.content.read`.\n",
|
||||||
"3. Generate access token: https://www.dropbox.com/developers/apps/create.\n",
|
"3. Generate access token: https://www.dropbox.com/developers/apps/create.\n",
|
||||||
"4. `pip install dropbox` (requires `pip install unstructured` for PDF filetype).\n",
|
"4. `pip install dropbox` (requires `pip install \"unstructured[pdf]\"` for PDF filetype).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Instructions\n",
|
"## Instructions\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
# 2. Give the app these scope permissions: `files.metadata.read`
|
# 2. Give the app these scope permissions: `files.metadata.read`
|
||||||
# and `files.content.read`.
|
# and `files.content.read`.
|
||||||
# 3. Generate access token: https://www.dropbox.com/developers/apps/create.
|
# 3. Generate access token: https://www.dropbox.com/developers/apps/create.
|
||||||
# 4. `pip install dropbox` (requires `pip install unstructured` for PDF filetype).
|
# 4. `pip install dropbox` (requires `pip install unstructured[pdf]` for PDF filetype).
|
||||||
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
@ -118,11 +118,10 @@ class DropboxLoader(BaseLoader, BaseModel):
|
|||||||
try:
|
try:
|
||||||
text = response.content.decode("utf-8")
|
text = response.content.decode("utf-8")
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
print(f"File {file_path} could not be decoded as text. Skipping.")
|
|
||||||
|
|
||||||
file_extension = os.path.splitext(file_path)[1].lower()
|
file_extension = os.path.splitext(file_path)[1].lower()
|
||||||
|
|
||||||
if file_extension == ".pdf":
|
if file_extension == ".pdf":
|
||||||
|
print(f"File {file_path} type detected as .pdf")
|
||||||
from langchain.document_loaders import UnstructuredPDFLoader
|
from langchain.document_loaders import UnstructuredPDFLoader
|
||||||
|
|
||||||
# Download it to a temporary file.
|
# Download it to a temporary file.
|
||||||
@ -139,6 +138,10 @@ class DropboxLoader(BaseLoader, BaseModel):
|
|||||||
except Exception as pdf_ex:
|
except Exception as pdf_ex:
|
||||||
print(f"Error while trying to parse PDF {file_path}: {pdf_ex}")
|
print(f"Error while trying to parse PDF {file_path}: {pdf_ex}")
|
||||||
return None
|
return None
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"File {file_path} could not be decoded as pdf or text. Skipping."
|
||||||
|
)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user