diff --git a/docs/docs/integrations/document_loaders/dropbox.ipynb b/docs/docs/integrations/document_loaders/dropbox.ipynb index 5c946f5eb76..83a785cc442 100644 --- a/docs/docs/integrations/document_loaders/dropbox.ipynb +++ b/docs/docs/integrations/document_loaders/dropbox.ipynb @@ -15,7 +15,7 @@ "1. Create a Dropbox app.\n", "2. Give the app these scope permissions: `files.metadata.read` and `files.content.read`.\n", "3. Generate access token: https://www.dropbox.com/developers/apps/create.\n", - "4. `pip install dropbox` (requires `pip install unstructured` for PDF filetype).\n", + "4. `pip install dropbox` (requires `pip install \"unstructured[pdf]\"` for PDF filetype).\n", "\n", "## Instructions\n", "\n", diff --git a/libs/langchain/langchain/document_loaders/dropbox.py b/libs/langchain/langchain/document_loaders/dropbox.py index 7a6d7dea7d6..7813181125f 100644 --- a/libs/langchain/langchain/document_loaders/dropbox.py +++ b/libs/langchain/langchain/document_loaders/dropbox.py @@ -3,7 +3,7 @@ # 2. Give the app these scope permissions: `files.metadata.read` # and `files.content.read`. # 3. Generate access token: https://www.dropbox.com/developers/apps/create. -# 4. `pip install dropbox` (requires `pip install unstructured` for PDF filetype). +# 4. `pip install dropbox` (requires `pip install unstructured[pdf]` for PDF filetype). import os @@ -118,11 +118,10 @@ class DropboxLoader(BaseLoader, BaseModel): try: text = response.content.decode("utf-8") except UnicodeDecodeError: - print(f"File {file_path} could not be decoded as text. Skipping.") - file_extension = os.path.splitext(file_path)[1].lower() if file_extension == ".pdf": + print(f"File {file_path} type detected as .pdf") from langchain.document_loaders import UnstructuredPDFLoader # Download it to a temporary file. @@ -139,6 +138,10 @@ class DropboxLoader(BaseLoader, BaseModel): except Exception as pdf_ex: print(f"Error while trying to parse PDF {file_path}: {pdf_ex}") return None + else: + print( + f"File {file_path} could not be decoded as pdf or text. Skipping." + ) return None