Fix images parser

This commit is contained in:
Philippe Prados 2025-03-26 15:01:16 +01:00
parent 38b50e3277
commit 09c4c1f867

View File

@ -45,13 +45,19 @@ class BaseImageBlobParser(BaseBlobParser):
""" """
try: try:
from PIL import Image as Img from PIL import Image as Img
except ImportError:
raise ImportError(
"`Pillow` package not found, please install it with "
"`pip install Pillow`"
)
with blob.as_bytes_io() as buf: with blob.as_bytes_io() as buf:
if blob.mimetype == "application/x-npy": if blob.mimetype == "application/x-npy":
try: array = numpy.load(buf)
img = Img.fromarray(numpy.load(buf)) if array.ndim == 3 and array.shape[2] == 1: # Grayscale image
except EOFError: img = Img.fromarray(numpy.squeeze(array, axis=2), mode="L")
return # Ignore too small images else:
img = Img.fromarray(array)
else: else:
img = Img.open(buf) img = Img.open(buf)
content = self._analyze_image(img) content = self._analyze_image(img)
@ -60,11 +66,6 @@ class BaseImageBlobParser(BaseBlobParser):
page_content=content, page_content=content,
metadata={**blob.metadata, **{"source": blob.source}}, metadata={**blob.metadata, **{"source": blob.source}},
) )
except ImportError:
raise ImportError(
"`Pillow` package not found, please install it with "
"`pip install Pillow`"
)
class RapidOCRBlobParser(BaseImageBlobParser): class RapidOCRBlobParser(BaseImageBlobParser):