diff --git a/libs/community/langchain_community/document_loaders/parsers/pdf.py b/libs/community/langchain_community/document_loaders/parsers/pdf.py index 254849df802..eb65b73f1dc 100644 --- a/libs/community/langchain_community/document_loaders/parsers/pdf.py +++ b/libs/community/langchain_community/document_loaders/parsers/pdf.py @@ -311,6 +311,12 @@ class PyPDFParser(BaseBlobParser): ) elif xObject[obj]["/Filter"][1:] in _PDF_FILTER_WITH_LOSS: images.append(xObject[obj].get_data()) + elif ( + isinstance(xObject[obj]["/Filter"], list) + and xObject[obj]["/Filter"] + and xObject[obj]["/Filter"][0][1:] in _PDF_FILTER_WITH_LOSS + ): + images.append(xObject[obj].get_data()) else: warnings.warn("Unknown PDF Filter!") return extract_from_images_with_rapidocr(images)