Fix images parser

This commit is contained in:
Philippe Prados 2025-03-26 15:01:16 +01:00
parent 38b50e3277
commit 09c4c1f867

View File

@ -45,13 +45,19 @@ class BaseImageBlobParser(BaseBlobParser):
"""
try:
from PIL import Image as Img
except ImportError:
raise ImportError(
"`Pillow` package not found, please install it with "
"`pip install Pillow`"
)
with blob.as_bytes_io() as buf:
if blob.mimetype == "application/x-npy":
try:
img = Img.fromarray(numpy.load(buf))
except EOFError:
return # Ignore too small images
array = numpy.load(buf)
if array.ndim == 3 and array.shape[2] == 1: # Grayscale image
img = Img.fromarray(numpy.squeeze(array, axis=2), mode="L")
else:
img = Img.fromarray(array)
else:
img = Img.open(buf)
content = self._analyze_image(img)
@ -60,11 +66,6 @@ class BaseImageBlobParser(BaseBlobParser):
page_content=content,
metadata={**blob.metadata, **{"source": blob.source}},
)
except ImportError:
raise ImportError(
"`Pillow` package not found, please install it with "
"`pip install Pillow`"
)
class RapidOCRBlobParser(BaseImageBlobParser):