mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-23 16:08:10 +00:00
Support S3 Object keys with /
in S3FileLoader
(#1517)
Resolves https://github.com/hwchase17/langchain/issues/1510 ### Problem When loading S3 Objects with `/` in the object key (eg. `folder/some-document.txt`) using `S3FileLoader`, the objects are downloaded into a temporary directory and saved as a file. This errors out when the parent directory does not exist within the temporary directory. See https://github.com/hwchase17/langchain/issues/1510#issuecomment-1459583696 on how to reproduce this bug ### What this pr does Creates parent directories based on object key. This also works with deeply nested keys: `folder/subfolder/some-document.txt`
This commit is contained in:
parent
aba58e9e2e
commit
784d24a1d5
@ -1,4 +1,5 @@
|
|||||||
"""Loading logic for loading documents from an s3 file."""
|
"""Loading logic for loading documents from an s3 file."""
|
||||||
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
@ -27,6 +28,7 @@ class S3FileLoader(BaseLoader):
|
|||||||
s3 = boto3.client("s3")
|
s3 = boto3.client("s3")
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
file_path = f"{temp_dir}/{self.key}"
|
file_path = f"{temp_dir}/{self.key}"
|
||||||
|
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||||
s3.download_file(self.bucket, self.key, file_path)
|
s3.download_file(self.bucket, self.key, file_path)
|
||||||
loader = UnstructuredFileLoader(file_path)
|
loader = UnstructuredFileLoader(file_path)
|
||||||
return loader.load()
|
return loader.load()
|
||||||
|
Loading…
Reference in New Issue
Block a user