mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-28 10:39:23 +00:00
wrap confluence attachment processing with a try-except block (#11503)
Prevents document loading from erroring out when an attachment is not found at the url. --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
17439daa6a
commit
c14a8df2ee
@ -541,26 +541,33 @@ class ConfluenceLoader(BaseLoader):
|
||||
media_type = attachment["metadata"]["mediaType"]
|
||||
absolute_url = self.base_url + attachment["_links"]["download"]
|
||||
title = attachment["title"]
|
||||
if media_type == "application/pdf":
|
||||
text = title + self.process_pdf(absolute_url, ocr_languages)
|
||||
elif (
|
||||
media_type == "image/png"
|
||||
or media_type == "image/jpg"
|
||||
or media_type == "image/jpeg"
|
||||
):
|
||||
text = title + self.process_image(absolute_url, ocr_languages)
|
||||
elif (
|
||||
media_type == "application/vnd.openxmlformats-officedocument"
|
||||
".wordprocessingml.document"
|
||||
):
|
||||
text = title + self.process_doc(absolute_url)
|
||||
elif media_type == "application/vnd.ms-excel":
|
||||
text = title + self.process_xls(absolute_url)
|
||||
elif media_type == "image/svg+xml":
|
||||
text = title + self.process_svg(absolute_url, ocr_languages)
|
||||
else:
|
||||
continue
|
||||
texts.append(text)
|
||||
try:
|
||||
if media_type == "application/pdf":
|
||||
text = title + self.process_pdf(absolute_url, ocr_languages)
|
||||
elif (
|
||||
media_type == "image/png"
|
||||
or media_type == "image/jpg"
|
||||
or media_type == "image/jpeg"
|
||||
):
|
||||
text = title + self.process_image(absolute_url, ocr_languages)
|
||||
elif (
|
||||
media_type == "application/vnd.openxmlformats-officedocument"
|
||||
".wordprocessingml.document"
|
||||
):
|
||||
text = title + self.process_doc(absolute_url)
|
||||
elif media_type == "application/vnd.ms-excel":
|
||||
text = title + self.process_xls(absolute_url)
|
||||
elif media_type == "image/svg+xml":
|
||||
text = title + self.process_svg(absolute_url, ocr_languages)
|
||||
else:
|
||||
continue
|
||||
texts.append(text)
|
||||
except requests.HTTPError as e:
|
||||
if e.response.status_code == 404:
|
||||
print(f"Attachment not found at {absolute_url}")
|
||||
continue
|
||||
else:
|
||||
raise
|
||||
|
||||
return texts
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user