From 52d6b91c18ca0c9374f9a465bc9af72547a55c38 Mon Sep 17 00:00:00 2001 From: Leonid Kuligin Date: Tue, 8 Aug 2023 15:34:43 +0200 Subject: [PATCH] Fixed a source for documents uploaded from GCS (#8912) Sets source for documents uploaded from GCS to source on gcs #8911 Co-authored-by: Leonid Kuligin --- libs/langchain/langchain/document_loaders/gcs_file.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libs/langchain/langchain/document_loaders/gcs_file.py b/libs/langchain/langchain/document_loaders/gcs_file.py index 10ba9a6f288..3dd6950393b 100644 --- a/libs/langchain/langchain/document_loaders/gcs_file.py +++ b/libs/langchain/langchain/document_loaders/gcs_file.py @@ -69,4 +69,8 @@ class GCSFileLoader(BaseLoader): # Download the file to a destination blob.download_to_filename(file_path) loader = self._loader_func(file_path) - return loader.load() + docs = loader.load() + for doc in docs: + if "source" in doc.metadata: + doc.metadata["source"] = f"gs://{self.bucket}/{self.blob}" + return docs