mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-02 13:08:57 +00:00
wrap confluence attachment processing with a try-except block (#11503)
Prevents document loading from erroring out when an attachment is not found at the url. --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
17439daa6a
commit
c14a8df2ee
@ -541,6 +541,7 @@ class ConfluenceLoader(BaseLoader):
|
|||||||
media_type = attachment["metadata"]["mediaType"]
|
media_type = attachment["metadata"]["mediaType"]
|
||||||
absolute_url = self.base_url + attachment["_links"]["download"]
|
absolute_url = self.base_url + attachment["_links"]["download"]
|
||||||
title = attachment["title"]
|
title = attachment["title"]
|
||||||
|
try:
|
||||||
if media_type == "application/pdf":
|
if media_type == "application/pdf":
|
||||||
text = title + self.process_pdf(absolute_url, ocr_languages)
|
text = title + self.process_pdf(absolute_url, ocr_languages)
|
||||||
elif (
|
elif (
|
||||||
@ -561,6 +562,12 @@ class ConfluenceLoader(BaseLoader):
|
|||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
texts.append(text)
|
texts.append(text)
|
||||||
|
except requests.HTTPError as e:
|
||||||
|
if e.response.status_code == 404:
|
||||||
|
print(f"Attachment not found at {absolute_url}")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
|
||||||
return texts
|
return texts
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user