mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-22 15:38:06 +00:00
Support S3 Object keys with /
in S3FileLoader
(#1517)
Resolves https://github.com/hwchase17/langchain/issues/1510 ### Problem When loading S3 Objects with `/` in the object key (eg. `folder/some-document.txt`) using `S3FileLoader`, the objects are downloaded into a temporary directory and saved as a file. This errors out when the parent directory does not exist within the temporary directory. See https://github.com/hwchase17/langchain/issues/1510#issuecomment-1459583696 on how to reproduce this bug ### What this pr does Creates parent directories based on object key. This also works with deeply nested keys: `folder/subfolder/some-document.txt`
This commit is contained in:
parent
aba58e9e2e
commit
784d24a1d5
@ -1,4 +1,5 @@
|
||||
"""Loading logic for loading documents from an s3 file."""
|
||||
import os
|
||||
import tempfile
|
||||
from typing import List
|
||||
|
||||
@ -27,6 +28,7 @@ class S3FileLoader(BaseLoader):
|
||||
s3 = boto3.client("s3")
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
file_path = f"{temp_dir}/{self.key}"
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
s3.download_file(self.bucket, self.key, file_path)
|
||||
loader = UnstructuredFileLoader(file_path)
|
||||
return loader.load()
|
||||
|
Loading…
Reference in New Issue
Block a user