mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-05 20:58:25 +00:00
Support GCS Objects with /
in GCS Loaders (#3356)
So, this is basically fixing the same things as #1517 but for GCS. ### Problem When loading GCS Objects with `/` in the object key (eg. folder/some-document.txt) using `GCSFileLoader`, the objects are downloaded into a temporary directory and saved as a file. This errors out when the parent directory does not exist within the temporary directory. ### What this pr does Creates parent directories based on object key. This also works with deeply nested keys: folder/subfolder/some-document.txt
This commit is contained in:
parent
a4d85f7fd5
commit
e6c1c32aff
@ -27,6 +27,10 @@ class GCSDirectoryLoader(BaseLoader):
|
|||||||
client = storage.Client(project=self.project_name)
|
client = storage.Client(project=self.project_name)
|
||||||
docs = []
|
docs = []
|
||||||
for blob in client.list_blobs(self.bucket, prefix=self.prefix):
|
for blob in client.list_blobs(self.bucket, prefix=self.prefix):
|
||||||
|
# we shall just skip directories since GCSFileLoader creates
|
||||||
|
# intermediate directories on the fly
|
||||||
|
if blob.name.endswith("/"):
|
||||||
|
continue
|
||||||
loader = GCSFileLoader(self.project_name, self.bucket, blob.name)
|
loader = GCSFileLoader(self.project_name, self.bucket, blob.name)
|
||||||
docs.extend(loader.load())
|
docs.extend(loader.load())
|
||||||
return docs
|
return docs
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
"""Loading logic for loading documents from a GCS file."""
|
"""Loading logic for loading documents from a GCS file."""
|
||||||
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
@ -34,6 +35,7 @@ class GCSFileLoader(BaseLoader):
|
|||||||
blob = bucket.blob(self.blob)
|
blob = bucket.blob(self.blob)
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
file_path = f"{temp_dir}/{self.blob}"
|
file_path = f"{temp_dir}/{self.blob}"
|
||||||
|
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||||
# Download the file to a destination
|
# Download the file to a destination
|
||||||
blob.download_to_filename(file_path)
|
blob.download_to_filename(file_path)
|
||||||
loader = UnstructuredFileLoader(file_path)
|
loader = UnstructuredFileLoader(file_path)
|
||||||
|
Loading…
Reference in New Issue
Block a user